StringEncoder.java revision 417deb1db112103aff04231b6ca79772ff7d3a21
1package SQLite;
2
3/**
4 * String encoder/decoder for SQLite.
5 *
6 * This module was kindly donated by Eric van der Maarel of Nedap N.V.
7 *
8 * This encoder was implemented based on an original idea from an anonymous
9 * author in the source code of the SQLite distribution.
10 * I feel obliged to provide a quote from the original C-source code:
11 *
12 * "The author disclaims copyright to this source code.  In place of
13 *  a legal notice, here is a blessing:
14 *
15 *     May you do good and not evil.
16 *     May you find forgiveness for yourself and forgive others.
17 *     May you share freely, never taking more than you give."
18 *
19 */
20
21public class StringEncoder {
22
23    /**
24     * Encodes the given byte array into a string that can be used by
25     * the SQLite database. The database cannot handle null (0x00) and
26     * the character '\'' (0x27). The encoding consists of escaping
27     * these characters with a reserved character (0x01). The escaping
28     * is applied after determining and applying a shift that minimizes
29     * the number of escapes required.
30     * With this encoding the data of original size n is increased to a
31     * maximum of 1+(n*257)/254.
32     * For sufficiently large n the overhead is thus less than 1.2%.
33     * @param a the byte array to be encoded. A null reference is handled as
34     *     an empty array.
35     * @return the encoded bytes as a string. When an empty array is
36     *     provided a string of length 1 is returned, the value of
37     *     which is bogus.
38     *     When decoded with this class' <code>decode</code> method
39     *     a string of size 1 will return an empty byte array.
40     */
41
42    public static String encode(byte[] a) {
43    // check input
44    if (a == null || a.length == 0) {
45        // bogus shift, no data
46        return "x";
47    }
48    // determine count
49    int[] cnt = new int[256];
50    for (int i = 0 ; i < a.length; i++) {
51        cnt[a[i] & 0xff]++;
52    }
53    // determine shift for minimum number of escapes
54    int shift = 1;
55    int nEscapes = a.length;
56    for (int i = 1; i < 256; i++) {
57        if (i == '\'') {
58        continue;
59        }
60        int sum = cnt[i] + cnt[(i + 1) & 0xff] + cnt[(i + '\'') & 0xff];
61        if (sum < nEscapes) {
62        nEscapes = sum;
63        shift = i;
64        if (nEscapes == 0) {
65            // cannot become smaller
66            break;
67        }
68        }
69    }
70    // construct encoded output
71    int outLen = a.length + nEscapes + 1;
72    StringBuffer out = new StringBuffer(outLen);
73    out.append((char)shift);
74    for (int i = 0; i < a.length; i++) {
75        // apply shift
76        char c = (char)((a[i] - shift)&0xff);
77        // insert escapes
78        if (c == 0) { // forbidden
79        out.append((char)1);
80        out.append((char)1);
81        } else if (c == 1) { // escape character
82        out.append((char)1);
83        out.append((char)2);
84        } else if (c == '\'') { // forbidden
85        out.append((char)1);
86        out.append((char)3);
87        } else {
88        out.append(c);
89        }
90    }
91    return out.toString();
92    }
93
94    /**
95     * Decodes the given string that is assumed to be a valid encoding
96     * of a byte array. Typically the given string is generated by
97     * this class' <code>encode</code> method.
98     * @param s the given string encoding.
99     * @return the byte array obtained from the decoding.
100     * @throws IllegalArgumentException when the string given is not
101     *    a valid encoded string for this encoder.
102     */
103
104    public static byte[] decode(String s) {
105    char[] a = s.toCharArray();
106    if (a.length > 2 && a[0] == 'X' &&
107        a[1] == '\'' && a[a.length-1] == '\'') {
108        // SQLite3 BLOB syntax
109        byte[] result = new byte[(a.length-3)/2];
110        for (int i = 2, k = 0; i < a.length - 1; i += 2, k++) {
111        byte tmp = (byte) (a[i] - '0');
112        if (tmp > 15) {
113            tmp -= 0x20;
114        }
115        result[k] = (byte) (tmp << 4);
116        tmp = (byte) (a[i+1] - '0');
117        if (tmp > 15) {
118            tmp -= 0x20;
119        }
120        result[k] |= tmp;
121        }
122        return result;
123    }
124    // first element is the shift
125    byte[] result = new byte[a.length-1];
126    int i = 0;
127    int shift = s.charAt(i++);
128    int j = 0;
129    while (i < s.length()) {
130        int c;
131        if ((c = s.charAt(i++)) == 1) { // escape character found
132        if ((c = s.charAt(i++)) == 1) {
133            c = 0;
134        } else if (c == 2) {
135            c = 1;
136        } else if (c == 3) {
137            c = '\'';
138        } else {
139            throw new IllegalArgumentException(
140            "invalid string passed to decoder: " + j);
141        }
142        }
143        // do shift
144        result[j++] = (byte)((c + shift) & 0xff);
145    }
146    int outLen = j;
147    // provide array of correct length
148    if (result.length != outLen) {
149        result = byteCopy(result, 0, outLen, new byte[outLen]);
150    }
151    return result;
152    }
153
154    /**
155     * Copies count elements from source, starting at element with
156     * index offset, to the given target.
157     * @param source the source.
158     * @param offset the offset.
159     * @param count the number of elements to be copied.
160     * @param target the target to be returned.
161     * @return the target being copied to.
162     */
163
164    private static byte[] byteCopy(byte[] source, int offset,
165                   int count, byte[] target) {
166    for (int i = offset, j = 0; i < offset + count; i++, j++) {
167        target[j] = source[i];
168    }
169    return target;
170    }
171
172
173    static final char[] xdigits = {
174    '0', '1', '2', '3', '4', '5', '6', '7',
175    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
176    };
177
178    /**
179     * Encodes the given byte array into SQLite3 blob notation, ie X'..'
180     * @param a the byte array to be encoded. A null reference is handled as
181     *     an empty array.
182     * @return the encoded bytes as a string.
183     */
184
185    public static String encodeX(byte[] a) {
186    // check input
187    if (a == null || a.length == 0) {
188        return "X''";
189    }
190    int outLen = a.length + 3;
191    StringBuffer out = new StringBuffer(outLen);
192    out.append('X');
193    out.append('\'');
194    for (int i = 0; i < a.length; i++) {
195        out.append(xdigits[a[i] >> 4]);
196        out.append(xdigits[a[i] & 0x0F]);
197    }
198    out.append('\'');
199    return out.toString();
200    }
201}
202