16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 1998-2012, International Business Machines
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* File ustring.cpp
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Modification History:
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Date        Name        Description
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   12/07/98    bertrand    Creation.
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/putil.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cwchar.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ustr_imp.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* ANSI string.h - style functions ------------------------------------------ */
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define U_BMP_MAX 0xffff
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Forward binary string search functions ----------------------------------- */
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Test if a substring match inside a string is at code point boundaries.
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * All pointers refer to the same buffer.
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The limit pointer may be NULL, all others must be real pointers.
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline UBool
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* the leading edge of the match is in the middle of a surrogate pair */
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* the trailing edge of the match is in the middle of a surrogate pair */
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strFindFirst(const UChar *s, int32_t length,
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               const UChar *sub, int32_t subLength) {
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *start, *p, *q, *subLimit;
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c, cs, cq;
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(sub==NULL || subLength<-1) {
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (UChar *)s;
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(s==NULL || length<-1) {
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    start=s;
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length<0 && subLength<0) {
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* both strings are NUL-terminated */
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if((cs=*sub++)==0) {
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return (UChar *)s;
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(*sub==0 && !U16_IS_SURROGATE(cs)) {
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* the substring consists of a single, non-surrogate BMP code point */
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return u_strchr(s, cs);
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while((c=*s++)!=0) {
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c==cs) {
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* found first substring UChar, compare rest */
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                p=s;
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                q=sub;
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(;;) {
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if((cq=*q)==0) {
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            return (UChar *)(s-1); /* well-formed match */
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        } else {
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break; /* no match because surrogate pair is split */
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if((c=*p)==0) {
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return NULL; /* no match, and none possible after s */
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(c!=cq) {
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break; /* no match */
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++p;
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++q;
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* not found */
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(subLength<0) {
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        subLength=u_strlen(sub);
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(subLength==0) {
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (UChar *)s;
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* get sub[0] to search for it fast */
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    cs=*sub++;
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    --subLength;
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    subLimit=sub+subLength;
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* the substring consists of a single, non-surrogate BMP code point */
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length<0) {
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* s is NUL-terminated */
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while((c=*s++)!=0) {
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c==cs) {
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* found first substring UChar, compare rest */
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                p=s;
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                q=sub;
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(;;) {
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(q==subLimit) {
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            return (UChar *)(s-1); /* well-formed match */
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        } else {
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break; /* no match because surrogate pair is split */
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if((c=*p)==0) {
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return NULL; /* no match, and none possible after s */
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(c!=*q) {
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break; /* no match */
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++p;
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++q;
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *limit, *preLimit;
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* subLength was decremented above */
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length<=subLength) {
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return NULL; /* s is shorter than sub */
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit=s+length;
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* the substring must start before preLimit */
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        preLimit=limit-subLength;
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while(s!=preLimit) {
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=*s++;
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c==cs) {
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* found first substring UChar, compare rest */
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                p=s;
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                q=sub;
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(;;) {
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(q==subLimit) {
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if(isMatchAtCPBoundary(start, s-1, p, limit)) {
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            return (UChar *)(s-1); /* well-formed match */
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        } else {
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break; /* no match because surrogate pair is split */
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(*p!=*q) {
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break; /* no match */
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++p;
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++q;
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* not found */
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return NULL;
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strstr(const UChar *s, const UChar *substring) {
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return u_strFindFirst(s, -1, substring, -1);
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strchr(const UChar *s, UChar c) {
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U16_IS_SURROGATE(c)) {
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* make sure to not find half of a surrogate pair */
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return u_strFindFirst(s, -1, &c, 1);
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar cs;
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* trivial search for a BMP code point */
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((cs=*s)==c) {
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return (UChar *)s;
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(cs==0) {
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return NULL;
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s;
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strchr32(const UChar *s, UChar32 c) {
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((uint32_t)c<=U_BMP_MAX) {
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* find BMP code point */
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return u_strchr(s, (UChar)c);
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* find supplementary code point as surrogate pair */
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while((cs=*s++)!=0) {
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(cs==lead && *s==trail) {
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return (UChar *)(s-1);
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* not a Unicode code point, not findable */
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memchr(const UChar *s, UChar c, int32_t count) {
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count<=0) {
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL; /* no string */
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(U16_IS_SURROGATE(c)) {
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* make sure to not find half of a surrogate pair */
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return u_strFindFirst(s, count, &c, 1);
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* trivial search for a BMP code point */
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *limit=s+count;
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        do {
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(*s==c) {
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return (UChar *)s;
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } while(++s!=limit);
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memchr32(const UChar *s, UChar32 c, int32_t count) {
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((uint32_t)c<=U_BMP_MAX) {
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* find BMP code point */
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return u_memchr(s, (UChar)c, count);
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(count<2) {
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* too short for a surrogate pair */
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* find supplementary code point as surrogate pair */
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        do {
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(*s==lead && *(s+1)==trail) {
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return (UChar *)s;
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } while(++s!=limit);
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* not a Unicode code point, not findable */
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Backward binary string search functions ---------------------------------- */
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strFindLast(const UChar *s, int32_t length,
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              const UChar *sub, int32_t subLength) {
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *start, *limit, *p, *q, *subLimit;
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c, cs;
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(sub==NULL || subLength<-1) {
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (UChar *)s;
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(s==NULL || length<-1) {
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This implementation is more lazy than the one for u_strFindFirst():
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * There is no special search code for NUL-terminated strings.
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * It does not seem to be worth it for searching substrings to
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * search forward and find all matches like in u_strrchr() and similar.
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Therefore, we simply get both string lengths and search backward.
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * markus 2002oct23
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(subLength<0) {
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        subLength=u_strlen(sub);
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(subLength==0) {
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (UChar *)s;
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* get sub[subLength-1] to search for it fast */
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    subLimit=sub+subLength;
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    cs=*(--subLimit);
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    --subLength;
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* the substring consists of a single, non-surrogate BMP code point */
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length<0) {
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length=u_strlen(s);
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* subLength was decremented above */
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length<=subLength) {
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL; /* s is shorter than sub */
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    start=s;
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    limit=s+length;
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* the substring must start no later than s+subLength */
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    s+=subLength;
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(s!=limit) {
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c=*(--limit);
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c==cs) {
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* found last substring UChar, compare rest */
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            p=limit;
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            q=subLimit;
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for(;;) {
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(q==sub) {
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return (UChar *)p; /* well-formed match */
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break; /* no match because surrogate pair is split */
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(*(--p)!=*(--q)) {
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break; /* no match */
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* not found */
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return NULL;
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strrstr(const UChar *s, const UChar *substring) {
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return u_strFindLast(s, -1, substring, -1);
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strrchr(const UChar *s, UChar c) {
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U16_IS_SURROGATE(c)) {
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* make sure to not find half of a surrogate pair */
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return u_strFindLast(s, -1, &c, 1);
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *result=NULL;
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar cs;
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* trivial search for a BMP code point */
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((cs=*s)==c) {
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                result=s;
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(cs==0) {
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return (UChar *)result;
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s;
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strrchr32(const UChar *s, UChar32 c) {
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((uint32_t)c<=U_BMP_MAX) {
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* find BMP code point */
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return u_strrchr(s, (UChar)c);
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* find supplementary code point as surrogate pair */
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *result=NULL;
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while((cs=*s++)!=0) {
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(cs==lead && *s==trail) {
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                result=s-1;
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (UChar *)result;
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* not a Unicode code point, not findable */
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memrchr(const UChar *s, UChar c, int32_t count) {
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count<=0) {
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL; /* no string */
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(U16_IS_SURROGATE(c)) {
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* make sure to not find half of a surrogate pair */
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return u_strFindLast(s, count, &c, 1);
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* trivial search for a BMP code point */
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *limit=s+count;
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        do {
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(*(--limit)==c) {
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return (UChar *)limit;
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } while(s!=limit);
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memrchr32(const UChar *s, UChar32 c, int32_t count) {
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((uint32_t)c<=U_BMP_MAX) {
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* find BMP code point */
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return u_memrchr(s, (UChar)c, count);
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(count<2) {
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* too short for a surrogate pair */
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* find supplementary code point as surrogate pair */
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *limit=s+count-1;
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        do {
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(*limit==trail && *(limit-1)==lead) {
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return (UChar *)(limit-1);
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } while(s!=--limit);
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* not a Unicode code point, not findable */
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Tokenization functions --------------------------------------------------- */
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Match each code point in a string against each code point in the matchSet.
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the index of the first string code point that
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return -(string length)-1 if there is no such code point.
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t matchLen, matchBMPLen, strItr, matchItr;
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 stringCh, matchCh;
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c, c2;
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* first part of matchSet contains only BMP code points */
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matchBMPLen = 0;
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++matchBMPLen;
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* second part of matchSet contains BMP and supplementary code points */
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matchLen = matchBMPLen;
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(matchSet[matchLen] != 0) {
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++matchLen;
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(strItr = 0; (c = string[strItr]) != 0;) {
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++strItr;
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(U16_IS_SINGLE(c)) {
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(polarity) {
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(matchItr = 0; matchItr < matchLen; ++matchItr) {
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(c == matchSet[matchItr]) {
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return strItr - 1; /* one matches */
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(matchItr = 0; matchItr < matchLen; ++matchItr) {
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(c == matchSet[matchItr]) {
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        goto endloop;
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return strItr - 1; /* none matches */
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * No need to check for string length before U16_IS_TRAIL
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * because c2 could at worst be the terminating NUL.
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++strItr;
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                stringCh = U16_GET_SUPPLEMENTARY(c, c2);
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                stringCh = c; /* unpaired trail surrogate */
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(polarity) {
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(matchItr = matchBMPLen; matchItr < matchLen;) {
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U16_NEXT(matchSet, matchItr, matchLen, matchCh);
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(stringCh == matchCh) {
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return strItr - U16_LENGTH(stringCh); /* one matches */
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(matchItr = matchBMPLen; matchItr < matchLen;) {
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U16_NEXT(matchSet, matchItr, matchLen, matchCh);
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(stringCh == matchCh) {
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        goto endloop;
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return strItr - U16_LENGTH(stringCh); /* none matches */
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgendloop:
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* wish C had continue with labels like Java... */;
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Didn't find it. */
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return -strItr-1;
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strpbrk(const UChar *string, const UChar *matchSet)
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t idx = _matchFromSet(string, matchSet, TRUE);
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(idx >= 0) {
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (UChar *)string + idx;
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strcspn(const UChar *string, const UChar *matchSet)
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t idx = _matchFromSet(string, matchSet, TRUE);
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(idx >= 0) {
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return idx;
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -idx - 1; /* == u_strlen(string) */
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strspn(const UChar *string, const UChar *matchSet)
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t idx = _matchFromSet(string, matchSet, FALSE);
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(idx >= 0) {
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return idx;
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -idx - 1; /* == u_strlen(string) */
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* ----- Text manipulation functions --- */
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar* U_EXPORT2
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strtok_r(UChar    *src,
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     const UChar    *delim,
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           UChar   **saveState)
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *tokSource;
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *nextToken;
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t nonDelimIdx;
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* If saveState is NULL, the user messed up. */
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (src != NULL) {
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        tokSource = src;
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *saveState = src; /* Set to "src" in case there are no delimiters */
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else if (*saveState) {
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        tokSource = *saveState;
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else {
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* src == NULL && *saveState == NULL */
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* This shouldn't happen. We already finished tokenizing. */
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Skip initial delimiters */
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nonDelimIdx = u_strspn(tokSource, delim);
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    tokSource = &tokSource[nonDelimIdx];
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (*tokSource) {
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextToken = u_strpbrk(tokSource, delim);
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (nextToken != NULL) {
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* Create a token */
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(nextToken++) = 0;
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *saveState = nextToken;
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return tokSource;
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (*saveState) {
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* Return the last token */
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *saveState = NULL;
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return tokSource;
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else {
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* No tokens were found. Only delimiters were left. */
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *saveState = NULL;
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return NULL;
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Miscellaneous functions -------------------------------------------------- */
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar* U_EXPORT2
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strcat(UChar     *dst,
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar     *src)
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *anchor = dst;            /* save a pointer to start of dst */
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(*dst != 0) {              /* To end of first string          */
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++dst;
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return anchor;
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar*  U_EXPORT2
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strncat(UChar     *dst,
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     const UChar     *src,
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     int32_t     n )
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(n > 0) {
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar *anchor = dst;            /* save a pointer to start of dst */
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while(*dst != 0) {              /* To end of first string          */
6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++dst;
6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while((*dst = *src) != 0) {     /* copy string 2 over              */
6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++dst;
6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(--n == 0) {
6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *dst = 0;
6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++src;
6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return anchor;
6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dst;
6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* ----- Text property functions --- */
6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t   U_EXPORT2
6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strcmp(const UChar *s1,
6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *s2)
6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar  c1, c2;
6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c1=*s1++;
6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c2=*s2++;
6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c1 != c2 || c1 == 0) {
6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)c1 - (int32_t)c2;
6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t U_EXPORT2
6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguprv_strCompare(const UChar *s1, int32_t length1,
6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UChar *s2, int32_t length2,
6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool strncmpStyle, UBool codePointOrder) {
6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *start1, *start2, *limit1, *limit2;
6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c1, c2;
6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* setup for fix-up */
6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    start1=s1;
6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    start2=s2;
6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* compare identical prefixes - they do not need to be fixed up */
6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length1<0 && length2<0) {
6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* strcmp style, both NUL-terminated */
6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(s1==s2) {
6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c1=*s1;
7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c2=*s2;
7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c1!=c2) {
7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c1==0) {
7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return 0;
7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s1;
7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s2;
7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* setup for fix-up */
7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit1=limit2=NULL;
7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(strncmpStyle) {
7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(s1==s2) {
7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit1=start1+length1;
7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* both lengths are same, check only one limit */
7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(s1==limit1) {
7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return 0;
7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c1=*s1;
7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c2=*s2;
7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c1!=c2) {
7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c1==0) {
7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return 0;
7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s1;
7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s2;
7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* setup for fix-up */
7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit2=start2+length1; /* use length1 here, too, to enforce assumption */
7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* memcmp/UnicodeString style, both length-specified */
7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t lengthResult;
7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length1<0) {
7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            length1=u_strlen(s1);
7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length2<0) {
7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            length2=u_strlen(s2);
7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* limit1=start1+min(lenght1, length2) */
7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length1<length2) {
7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lengthResult=-1;
7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            limit1=start1+length1;
7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(length1==length2) {
7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lengthResult=0;
7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            limit1=start1+length1;
7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else /* length1>length2 */ {
7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lengthResult=1;
7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            limit1=start1+length2;
7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(s1==s2) {
7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return lengthResult;
7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* check pseudo-limit */
7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(s1==limit1) {
7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return lengthResult;
7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c1=*s1;
7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c2=*s2;
7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c1!=c2) {
7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s1;
7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s2;
7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* setup for fix-up */
7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit1=start1+length1;
7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit2=start2+length2;
7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* if both values are in or above the surrogate range, fix them up */
7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(
7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ) {
7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* part of a surrogate pair, leave >=d800 */
7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* BMP code point - may be surrogate code point - make <d800 */
8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c1-=0x2800;
8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(
8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ) {
8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* part of a surrogate pair, leave >=d800 */
8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* BMP code point - may be surrogate code point - make <d800 */
8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c2-=0x2800;
8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* now c1 and c2 are in the requested (code unit or code point) order */
8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)c1-(int32_t)c2;
8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Compare two strings as presented by UCharIterators.
8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Use code unit or code point order.
8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * When the function returns, it is undefined where the iterators
8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * have stopped.
8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c1, c2;
8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* argument checking */
8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(iter1==NULL || iter2==NULL) {
8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0; /* bad arguments */
8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(iter1==iter2) {
8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0; /* identical iterators */
8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* reset iterators to start? */
8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    iter1->move(iter1, 0, UITER_START);
8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    iter2->move(iter2, 0, UITER_START);
8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* compare identical prefixes - they do not need to be fixed up */
8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c1=iter1->next(iter1);
8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c2=iter2->next(iter2);
8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c1!=c2) {
8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c1==-1) {
8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* if both values are in or above the surrogate range, fix them up */
8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(
8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ) {
8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* part of a surrogate pair, leave >=d800 */
8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* BMP code point - may be surrogate code point - make <d800 */
8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c1-=0x2800;
8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(
8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ) {
8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* part of a surrogate pair, leave >=d800 */
8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* BMP code point - may be surrogate code point - make <d800 */
8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c2-=0x2800;
8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* now c1 and c2 are in the requested (code unit or code point) order */
8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)c1-(int32_t)c2;
8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * u_strCompareIter() does not leave the iterators _on_ the different units.
8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is possible but would cost a few extra indirect function calls to back
8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * up if the last unit (c1 or c2 respectively) was >=0.
8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Consistently leaving them _behind_ the different units is not an option
8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * because the current "unit" is the end of the string if that is reached,
8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and in such a case the iterator does not move.
8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of their strings. Calling previous() on each does not move them to where
8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the comparison fails.
8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * So the simplest semantics is to not define where the iterators end up.
8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The following fragment is part of what would need to be done for backing up.
8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid fragment {
8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* iff a surrogate is part of a surrogate pair, leave >=d800 */
9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c1<=0xdbff) {
9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(!U16_IS_TRAIL(iter1->current(iter1))) {
9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* lead surrogate code point - make <d800 */
9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c1-=0x2800;
9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(c1<=0xdfff) {
9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            iter1->previous(iter1); /* ==c1 */
9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(!U16_IS_LEAD(iter1->previous(iter1))) {
9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* trail surrogate code point - make <d800 */
9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c1-=0x2800;
9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* go back to behind where the difference is */
9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            iter1->move(iter1, idx, UITER_ZERO);
9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else /* 0xe000<=c1<=0xffff */ {
9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* BMP code point - make <d800 */
9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c1-=0x2800;
9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strCompare(const UChar *s1, int32_t length1,
9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             const UChar *s2, int32_t length2,
9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             UBool codePointOrder) {
9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* argument checking */
9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* String compare in code point order - u_strcmp() compares in code unit order. */
9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t   U_EXPORT2
9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strncmp(const UChar     *s1,
9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     const UChar     *s2,
9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     int32_t     n)
9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(n > 0) {
9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t rc;
9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            rc = (int32_t)*s1 - (int32_t)*s2;
9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(rc != 0 || *s1 == 0 || --n == 0) {
9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return rc;
9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s1;
9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++s2;
9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar* U_EXPORT2
9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strcpy(UChar     *dst,
9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar     *src)
9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *anchor = dst;            /* save a pointer to start of dst */
9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return anchor;
9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar*  U_EXPORT2
9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strncpy(UChar     *dst,
9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     const UChar     *src,
9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     int32_t     n)
9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *anchor = dst;            /* save a pointer to start of dst */
9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* copy string 2 over */
9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(n > 0 && (*(dst++) = *(src++)) != 0) {
9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        --n;
9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return anchor;
9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t   U_EXPORT2
9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strlen(const UChar *s)
9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)uprv_wcslen(s);
9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else
9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *t = s;
9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(*t != 0) {
9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      ++t;
9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return t - s;
10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_countChar32(const UChar *s, int32_t length) {
10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t count;
10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(s==NULL || length<-1) {
10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    count=0;
10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length>=0) {
10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while(length>0) {
10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++count;
10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                s+=2;
10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                length-=2;
10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++s;
10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                --length;
10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else /* length==-1 */ {
10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar c;
10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((c=*s++)==0) {
10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++count;
10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * sufficient to look ahead one because of UTF-16;
10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * safe to look ahead one because at worst that would be the terminating NUL
10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++s;
10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return count;
10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2
10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(number<0) {
10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return TRUE;
10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(s==NULL || length<-1) {
10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length==-1) {
10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* s is NUL-terminated */
10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar c;
10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* count code points until they exceed */
10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((c=*s++)==0) {
10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(number==0) {
10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return TRUE;
10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++s;
10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            --number;
10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* length>=0 known */
10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *limit;
10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t maxSupplementary;
10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(((length+1)/2)>number) {
10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return TRUE;
10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* check if s does not even contain enough UChars */
10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        maxSupplementary=length-number;
10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(maxSupplementary<=0) {
10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* there are maxSupplementary=length-number more UChars than asked-for code points */
10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * count code points until they exceed and also check that there are
10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * no more than maxSupplementary supplementary code points (UChar pairs)
10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit=s+length;
10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(s==limit) {
10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(number==0) {
10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return TRUE;
11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++s;
11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(--maxSupplementary<=0) {
11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* too many pairs - too few code points */
11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            --number;
11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memcpy(UChar *dest, const UChar *src, int32_t count) {
11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count > 0) {
11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memmove(UChar *dest, const UChar *src, int32_t count) {
11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count > 0) {
11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar * U_EXPORT2
11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memset(UChar *dest, UChar c, int32_t count) {
11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count > 0) {
11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar *ptr = dest;
11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar *limit = dest + count;
11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (ptr < limit) {
11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(ptr++) = c;
11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count > 0) {
11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *limit = buf1 + count;
11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t result;
11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (buf1 < limit) {
11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (result != 0) {
11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return result;
11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buf1++;
11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buf2++;
11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return 0;
11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* u_unescape & support fns ------------------------------------------------- */
11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar UNESCAPE_MAP[] = {
11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*"   0x22, 0x22 */
11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*'   0x27, 0x27 */
11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*?   0x3F, 0x3F */
11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*\   0x5C, 0x5C */
11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*a*/ 0x61, 0x07,
11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*b*/ 0x62, 0x08,
11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*e*/ 0x65, 0x1b,
11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*f*/ 0x66, 0x0c,
11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*n*/ 0x6E, 0x0a,
11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*r*/ 0x72, 0x0d,
11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*t*/ 0x74, 0x09,
11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*v*/ 0x76, 0x0b
11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgenum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };
11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int8_t _digit8(UChar c) {
11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (c >= 0x0030 && c <= 0x0037) {
11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (int8_t)(c - 0x0030);
11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return -1;
11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int8_t _digit16(UChar c) {
11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (c >= 0x0030 && c <= 0x0039) {
11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (int8_t)(c - 0x0030);
11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (c >= 0x0041 && c <= 0x0046) {
11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (int8_t)(c - (0x0041 - 10));
11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (c >= 0x0061 && c <= 0x0066) {
12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (int8_t)(c - (0x0061 - 10));
12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return -1;
12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Parse a single escape sequence.  Although this method deals in
12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UChars, it does not use C++ or UnicodeString.  This allows it to
12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * be used from C contexts. */
12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2
12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_unescapeAt(UNESCAPE_CHAR_AT charAt,
12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             int32_t *offset,
12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             int32_t length,
12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             void *context) {
12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t start = *offset;
12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c;
12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 result = 0;
12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int8_t n = 0;
12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int8_t minDig = 0;
12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int8_t maxDig = 0;
12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int8_t bitsPerDigit = 4;
12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int8_t dig;
12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool braces = FALSE;
12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Check that offset is in range */
12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (*offset < 0 || *offset >= length) {
12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto err;
12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Fetch first UChar after '\\' */
12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    c = charAt((*offset)++, context);
12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Convert hexadecimal and octal escapes */
12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch (c) {
12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 0x0075 /*'u'*/:
12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        minDig = maxDig = 4;
12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 0x0055 /*'U'*/:
12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        minDig = maxDig = 8;
12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 0x0078 /*'x'*/:
12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        minDig = 1;
12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++(*offset);
12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            braces = TRUE;
12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            maxDig = 8;
12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            maxDig = 2;
12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    default:
12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dig = _digit8(c);
12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (dig >= 0) {
12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            minDig = 1;
12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            maxDig = 3;
12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            n = 1; /* Already have first octal digit */
12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            bitsPerDigit = 3;
12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            result = dig;
12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (minDig != 0) {
12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (*offset < length && n < maxDig) {
12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = charAt(*offset, context);
12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (dig < 0) {
12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            result = (result << bitsPerDigit) | dig;
12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++(*offset);
12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++n;
12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (n < minDig) {
12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto err;
12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (braces) {
12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c != 0x7D /*}*/) {
12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto err;
12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++(*offset);
12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (result < 0 || result >= 0x110000) {
12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto err;
12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* If an escape sequence specifies a lead surrogate, see if
12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * there is a trail surrogate after it, either as an escape or
12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * as a literal.  If so, join them up into a supplementary.
12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (*offset < length && U16_IS_LEAD(result)) {
12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t ahead = *offset + 1;
12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = charAt(*offset, context);
12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c == 0x5C /*'\\'*/ && ahead < length) {
12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U16_IS_TRAIL(c)) {
12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *offset = ahead;
12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                result = U16_GET_SUPPLEMENTARY(result, c);
12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return result;
13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Convert C-style escapes in table */
13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c == UNESCAPE_MAP[i]) {
13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return UNESCAPE_MAP[i+1];
13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if (c < UNESCAPE_MAP[i]) {
13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Map \cX to control-X: X & 0x1F */
13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (c == 0x0063 /*'c'*/ && *offset < length) {
13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c = charAt((*offset)++, context);
13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U16_IS_LEAD(c) && *offset < length) {
13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar c2 = charAt(*offset, context);
13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U16_IS_TRAIL(c2)) {
13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++(*offset);
13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0x1F & c;
13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* If no special forms are recognized, then consider
13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * the backslash to generically escape the next character.
13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Deal with surrogate pairs. */
13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U16_IS_LEAD(c) && *offset < length) {
13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar c2 = charAt(*offset, context);
13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U16_IS_TRAIL(c2)) {
13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++(*offset);
13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return U16_GET_SUPPLEMENTARY(c, c2);
13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return c;
13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org err:
13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Invalid escape sequence */
13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    *offset = start; /* Reset to initial value */
13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (UChar32)0xFFFFFFFF;
13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* u_unescapeAt() callback to return a UChar from a char* */
13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UChar U_CALLCONV
13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_charPtr_charAt(int32_t offset, void *context) {
13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c16;
13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* It would be more efficient to access the invariant tables
13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * directly but there is no API for that. */
13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    u_charsToUChars(((char*) context) + offset, &c16, 1);
13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return c16;
13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Append an escape-free segment of the text; used by u_unescape() */
13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void _appendUChars(UChar *dest, int32_t destCapacity,
13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          const char *src, int32_t srcLen) {
13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (destCapacity < 0) {
13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        destCapacity = 0;
13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (srcLen > destCapacity) {
13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        srcLen = destCapacity;
13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    u_charsToUChars(src, dest, srcLen);
13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Do an invariant conversion of char* -> UChar*, with escape parsing */
13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_unescape(const char *src, UChar *dest, int32_t destCapacity) {
13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char *segment = src;
13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i = 0;
13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char c;
13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while ((c=*src) != 0) {
13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* '\\' intentionally written as compiler-specific
13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * character constant to correspond to compiler-specific
13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * char* constants. */
13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c == '\\') {
13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t lenParsed = 0;
13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 c32;
13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (src != segment) {
13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (dest != NULL) {
13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    _appendUChars(dest + i, destCapacity - i,
13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                  segment, (int32_t)(src - segment));
13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                i += (int32_t)(src - segment);
13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++src; /* advance past '\\' */
13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (lenParsed == 0) {
13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto err;
13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            src += lenParsed; /* advance past escape seq. */
13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_APPEND_UNSAFE(dest, i, c32);
13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                i += U16_LENGTH(c32);
13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            segment = src;
13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++src;
14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (src != segment) {
14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (dest != NULL) {
14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            _appendUChars(dest + i, destCapacity - i,
14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          segment, (int32_t)(src - segment));
14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        i += (int32_t)(src - segment);
14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (dest != NULL && i < destCapacity) {
14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dest[i] = 0;
14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return i;
14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org err:
14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (dest != NULL && destCapacity > 0) {
14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *dest = 0;
14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return 0;
14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* NUL-termination of strings ----------------------------------------------- */
14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * NUL-terminate a string no matter what its type.
14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set warning and error codes accordingly.
14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* not a public function, so no complete argument checking */   \
14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                                        \
14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length<0) {                                                  \
14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* assume that the caller handles this */                   \
14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(length<destCapacity) {                                \
14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* NUL-terminate the string, the NUL fits */                \
14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dest[length]=0;                                             \
14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* unset the not-terminated warning but leave all others */ \
14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *pErrorCode=U_ZERO_ERROR;                               \
14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }                                                           \
14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(length==destCapacity) {                               \
14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else /* length>destCapacity */ {                              \
14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* even the string itself did not fit - set an error code */ \
14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }                                                               \
14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return length;
14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return length;
14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return length;
14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return length;
14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Compute the hash code for a string -------------------------------------- ***
14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// on UHashtable code.
14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  Compute the hash by iterating sparsely over about 32 (up to 63)
14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  characters spaced evenly through the string.  For each character,
14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  multiply the previous hash value by a prime number and add the new
14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  character in, like a linear congruential random number generator,
14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  producing a pseudorandom deterministic value well distributed over
14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  the output range. [LIU]
14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t hash = 0;                         \
14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const TYPE *p = (const TYPE*) STR;        \
14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (p != NULL) {                          \
14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t len = (int32_t)(STRLEN);      \
14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t inc = ((len - 32) / 32) + 1;  \
14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const TYPE *limit = p + len;          \
14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (p<limit) {                     \
14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            hash = (hash * 37) + DEREF;       \
14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            p += inc;                         \
14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }                                     \
14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }                                         \
15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return hash
15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Used by UnicodeString to compute its hashcode - Not public API. */
15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgustr_hashUCharsN(const UChar *str, int32_t length) {
15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    STRING_HASH(UChar, str, length, *p);
15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgustr_hashCharsN(const char *str, int32_t length) {
15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    STRING_HASH(uint8_t, str, length, *p);
15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgustr_hashICharsN(const char *str, int32_t length) {
15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1517