StringEncoder.java revision 417deb1db112103aff04231b6ca79772ff7d3a21
1package SQLite; 2 3/** 4 * String encoder/decoder for SQLite. 5 * 6 * This module was kindly donated by Eric van der Maarel of Nedap N.V. 7 * 8 * This encoder was implemented based on an original idea from an anonymous 9 * author in the source code of the SQLite distribution. 10 * I feel obliged to provide a quote from the original C-source code: 11 * 12 * "The author disclaims copyright to this source code. In place of 13 * a legal notice, here is a blessing: 14 * 15 * May you do good and not evil. 16 * May you find forgiveness for yourself and forgive others. 17 * May you share freely, never taking more than you give." 18 * 19 */ 20 21public class StringEncoder { 22 23 /** 24 * Encodes the given byte array into a string that can be used by 25 * the SQLite database. The database cannot handle null (0x00) and 26 * the character '\'' (0x27). The encoding consists of escaping 27 * these characters with a reserved character (0x01). The escaping 28 * is applied after determining and applying a shift that minimizes 29 * the number of escapes required. 30 * With this encoding the data of original size n is increased to a 31 * maximum of 1+(n*257)/254. 32 * For sufficiently large n the overhead is thus less than 1.2%. 33 * @param a the byte array to be encoded. A null reference is handled as 34 * an empty array. 35 * @return the encoded bytes as a string. When an empty array is 36 * provided a string of length 1 is returned, the value of 37 * which is bogus. 38 * When decoded with this class' <code>decode</code> method 39 * a string of size 1 will return an empty byte array. 40 */ 41 42 public static String encode(byte[] a) { 43 // check input 44 if (a == null || a.length == 0) { 45 // bogus shift, no data 46 return "x"; 47 } 48 // determine count 49 int[] cnt = new int[256]; 50 for (int i = 0 ; i < a.length; i++) { 51 cnt[a[i] & 0xff]++; 52 } 53 // determine shift for minimum number of escapes 54 int shift = 1; 55 int nEscapes = a.length; 56 for (int i = 1; i < 256; i++) { 57 if (i == '\'') { 58 continue; 59 } 60 int sum = cnt[i] + cnt[(i + 1) & 0xff] + cnt[(i + '\'') & 0xff]; 61 if (sum < nEscapes) { 62 nEscapes = sum; 63 shift = i; 64 if (nEscapes == 0) { 65 // cannot become smaller 66 break; 67 } 68 } 69 } 70 // construct encoded output 71 int outLen = a.length + nEscapes + 1; 72 StringBuffer out = new StringBuffer(outLen); 73 out.append((char)shift); 74 for (int i = 0; i < a.length; i++) { 75 // apply shift 76 char c = (char)((a[i] - shift)&0xff); 77 // insert escapes 78 if (c == 0) { // forbidden 79 out.append((char)1); 80 out.append((char)1); 81 } else if (c == 1) { // escape character 82 out.append((char)1); 83 out.append((char)2); 84 } else if (c == '\'') { // forbidden 85 out.append((char)1); 86 out.append((char)3); 87 } else { 88 out.append(c); 89 } 90 } 91 return out.toString(); 92 } 93 94 /** 95 * Decodes the given string that is assumed to be a valid encoding 96 * of a byte array. Typically the given string is generated by 97 * this class' <code>encode</code> method. 98 * @param s the given string encoding. 99 * @return the byte array obtained from the decoding. 100 * @throws IllegalArgumentException when the string given is not 101 * a valid encoded string for this encoder. 102 */ 103 104 public static byte[] decode(String s) { 105 char[] a = s.toCharArray(); 106 if (a.length > 2 && a[0] == 'X' && 107 a[1] == '\'' && a[a.length-1] == '\'') { 108 // SQLite3 BLOB syntax 109 byte[] result = new byte[(a.length-3)/2]; 110 for (int i = 2, k = 0; i < a.length - 1; i += 2, k++) { 111 byte tmp = (byte) (a[i] - '0'); 112 if (tmp > 15) { 113 tmp -= 0x20; 114 } 115 result[k] = (byte) (tmp << 4); 116 tmp = (byte) (a[i+1] - '0'); 117 if (tmp > 15) { 118 tmp -= 0x20; 119 } 120 result[k] |= tmp; 121 } 122 return result; 123 } 124 // first element is the shift 125 byte[] result = new byte[a.length-1]; 126 int i = 0; 127 int shift = s.charAt(i++); 128 int j = 0; 129 while (i < s.length()) { 130 int c; 131 if ((c = s.charAt(i++)) == 1) { // escape character found 132 if ((c = s.charAt(i++)) == 1) { 133 c = 0; 134 } else if (c == 2) { 135 c = 1; 136 } else if (c == 3) { 137 c = '\''; 138 } else { 139 throw new IllegalArgumentException( 140 "invalid string passed to decoder: " + j); 141 } 142 } 143 // do shift 144 result[j++] = (byte)((c + shift) & 0xff); 145 } 146 int outLen = j; 147 // provide array of correct length 148 if (result.length != outLen) { 149 result = byteCopy(result, 0, outLen, new byte[outLen]); 150 } 151 return result; 152 } 153 154 /** 155 * Copies count elements from source, starting at element with 156 * index offset, to the given target. 157 * @param source the source. 158 * @param offset the offset. 159 * @param count the number of elements to be copied. 160 * @param target the target to be returned. 161 * @return the target being copied to. 162 */ 163 164 private static byte[] byteCopy(byte[] source, int offset, 165 int count, byte[] target) { 166 for (int i = offset, j = 0; i < offset + count; i++, j++) { 167 target[j] = source[i]; 168 } 169 return target; 170 } 171 172 173 static final char[] xdigits = { 174 '0', '1', '2', '3', '4', '5', '6', '7', 175 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' 176 }; 177 178 /** 179 * Encodes the given byte array into SQLite3 blob notation, ie X'..' 180 * @param a the byte array to be encoded. A null reference is handled as 181 * an empty array. 182 * @return the encoded bytes as a string. 183 */ 184 185 public static String encodeX(byte[] a) { 186 // check input 187 if (a == null || a.length == 0) { 188 return "X''"; 189 } 190 int outLen = a.length + 3; 191 StringBuffer out = new StringBuffer(outLen); 192 out.append('X'); 193 out.append('\''); 194 for (int i = 0; i < a.length; i++) { 195 out.append(xdigits[a[i] >> 4]); 196 out.append(xdigits[a[i] & 0x0F]); 197 } 198 out.append('\''); 199 return out.toString(); 200 } 201} 202