1/* 2 ******************************************************************************* 3 * Copyright (C) 2002-2010, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6*/ 7 8package com.ibm.icu.impl; 9/** 10 * @version 1.1 11 * @author Markus W. Scherer 12 * Ram: Add documentation, remove unwanted methods, improve coverage. 13 */ 14 15/** 16 * Simple class for handling serialized USet/UnicodeSet structures 17 * without object creation. See ICU4C icu/source/common/uset.c. 18 * 19 * @internal 20 */ 21public final class USerializedSet { 22 /** 23 * Fill in the given serialized set object. 24 * @param src pointer to start of array 25 * @param srcStart pointer to start of serialized data (length value) 26 * @return true if the given array is valid, otherwise false 27 */ 28 public final boolean getSet(char src[], int srcStart) { 29 // leave most argument checking up to Java exceptions 30 array=null; 31 arrayOffset=bmpLength=length=0; 32 33 length=src[srcStart++]; 34 35 36 if((length&0x8000) >0) { 37 /* there are supplementary values */ 38 length&=0x7fff; 39 if(src.length<(srcStart+1+length)) { 40 length=0; 41 throw new IndexOutOfBoundsException(); 42 } 43 bmpLength=src[srcStart++]; 44 } else { 45 /* only BMP values */ 46 if(src.length<(srcStart+length)) { 47 length=0; 48 throw new IndexOutOfBoundsException(); 49 } 50 bmpLength=length; 51 } 52 array = new char[length]; 53 System.arraycopy(src,srcStart,array,0,length); 54 //arrayOffset=srcStart; 55 return true; 56 } 57 58 /** 59 * Set the USerializedSet to contain the given character (and nothing 60 * else). 61 */ 62 public final void setToOne(int c) { 63 if( 0x10ffff<c) { 64 return; 65 } 66 67 if(c<0xffff) { 68 bmpLength=length=2; 69 array[0]=(char)c; 70 array[1]=(char)(c+1); 71 } else if(c==0xffff) { 72 bmpLength=1; 73 length=3; 74 array[0]=0xffff; 75 array[1]=1; 76 array[2]=0; 77 } else if(c<0x10ffff) { 78 bmpLength=0; 79 length=4; 80 array[0]=(char)(c>>16); 81 array[1]=(char)c; 82 ++c; 83 array[2]=(char)(c>>16); 84 array[3]=(char)c; 85 } else /* c==0x10ffff */ { 86 bmpLength=0; 87 length=2; 88 array[0]=0x10; 89 array[1]=0xffff; 90 } 91 } 92 93 /** 94 * Returns a range of characters contained in the given serialized 95 * set. 96 * @param rangeIndex a non-negative integer in the range <code>0.. 97 * getSerializedRangeCount()-1</code> 98 * @param range variable to receive the data in the range 99 * @return true if rangeIndex is valid, otherwise false 100 */ 101 public final boolean getRange(int rangeIndex, int[] range) { 102 if( rangeIndex<0) { 103 return false; 104 } 105 if(array==null){ 106 array = new char[8]; 107 } 108 if(range==null || range.length <2){ 109 throw new IllegalArgumentException(); 110 } 111 rangeIndex*=2; /* address start/limit pairs */ 112 if(rangeIndex<bmpLength) { 113 range[0]=array[rangeIndex++]; 114 if(rangeIndex<bmpLength) { 115 range[1]=array[rangeIndex]-1; 116 } else if(rangeIndex<length) { 117 range[1]=((((int)array[rangeIndex])<<16)|array[rangeIndex+1])-1; 118 } else { 119 range[1]=0x10ffff; 120 } 121 return true; 122 } else { 123 rangeIndex-=bmpLength; 124 rangeIndex*=2; /* address pairs of pairs of units */ 125 int suppLength=length-bmpLength; 126 if(rangeIndex<suppLength) { 127 int offset=arrayOffset+bmpLength; 128 range[0]=(((int)array[offset+rangeIndex])<<16)|array[offset+rangeIndex+1]; 129 rangeIndex+=2; 130 if(rangeIndex<suppLength) { 131 range[1]=((((int)array[offset+rangeIndex])<<16)|array[offset+rangeIndex+1])-1; 132 } else { 133 range[1]=0x10ffff; 134 } 135 return true; 136 } else { 137 return false; 138 } 139 } 140 } 141 142 /** 143 * Returns true if the given USerializedSet contains the given 144 * character. 145 * @param c the character to test for 146 * @return true if set contains c 147 */ 148 public final boolean contains(int c) { 149 150 if(c>0x10ffff) { 151 return false; 152 } 153 154 if(c<=0xffff) { 155 int i; 156 /* find c in the BMP part */ 157 for(i=0; i<bmpLength && (char)c>=array[i]; ++i) {} 158 return ((i&1) != 0); 159 } else { 160 int i; 161 /* find c in the supplementary part */ 162 char high=(char)(c>>16), low=(char)c; 163 for(i=bmpLength; 164 i<length && (high>array[i] || (high==array[i] && low>=array[i+1])); 165 i+=2) {} 166 167 /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */ 168 return (((i+bmpLength)&2)!=0); 169 } 170 } 171 172 /** 173 * Returns the number of disjoint ranges of characters contained in 174 * the given serialized set. Ignores any strings contained in the 175 * set. 176 * @return a non-negative integer counting the character ranges 177 * contained in set 178 */ 179 public final int countRanges() { 180 return (bmpLength+(length-bmpLength)/2+1)/2; 181 } 182 183 private char array[] = new char[8]; 184 private int arrayOffset, bmpLength, length; 185} 186