1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_BYTE_ARRAY_UTILS_H
18#define LATINIME_BYTE_ARRAY_UTILS_H
19
20#include <stdint.h>
21
22#include "defines.h"
23
24namespace latinime {
25
26/**
27 * Utility methods for reading byte arrays.
28 */
29class ByteArrayUtils {
30 public:
31    /**
32     * Integer writing
33     *
34     * Each method write a corresponding size integer in a big endian manner.
35     */
36    static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer,
37            const uint32_t data, const int size, int *const pos) {
38        // size must be in 1 to 4.
39        ASSERT(size >= 1 && size <= 4);
40        switch (size) {
41            case 1:
42                ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos);
43                return;
44            case 2:
45                ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos);
46                return;
47            case 3:
48                ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos);
49                return;
50            case 4:
51                ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos);
52                return;
53            default:
54                break;
55        }
56    }
57
58    /**
59     * Integer reading
60     *
61     * Each method read a corresponding size integer in a big endian manner.
62     */
63    static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, const int pos) {
64        return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16)
65                ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3];
66    }
67
68    static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, const int pos) {
69        return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2];
70    }
71
72    static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, const int pos) {
73        return (buffer[pos] << 8) ^ buffer[pos + 1];
74    }
75
76    static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) {
77        return buffer[pos];
78    }
79
80    static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(
81            const uint8_t *const buffer, int *const pos) {
82        const uint32_t value = readUint32(buffer, *pos);
83        *pos += 4;
84        return value;
85    }
86
87    static AK_FORCE_INLINE int readSint24AndAdvancePosition(
88            const uint8_t *const buffer, int *const pos) {
89        const uint8_t value = readUint8(buffer, *pos);
90        if (value < 0x80) {
91            return readUint24AndAdvancePosition(buffer, pos);
92        } else {
93            (*pos)++;
94            return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos));
95        }
96    }
97
98    static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(
99            const uint8_t *const buffer, int *const pos) {
100        const uint32_t value = readUint24(buffer, *pos);
101        *pos += 3;
102        return value;
103    }
104
105    static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(
106            const uint8_t *const buffer, int *const pos) {
107        const uint16_t value = readUint16(buffer, *pos);
108        *pos += 2;
109        return value;
110    }
111
112    static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(
113            const uint8_t *const buffer, int *const pos) {
114        return buffer[(*pos)++];
115    }
116
117    /**
118     * Code Point Reading
119     *
120     * 1 byte = bbbbbbbb match
121     * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
122     * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
123     *       unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
124     *       00011111 would be outside unicode.
125     * else: iso-latin-1 code
126     * This allows for the whole unicode range to be encoded, including chars outside of
127     * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
128     * characters which should never happen anyway (and still work, but take 3 bytes).
129     */
130    static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) {
131        int p = pos;
132        return readCodePointAndAdvancePosition(buffer, &p);
133    }
134
135    static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
136            const uint8_t *const buffer, int *const pos) {
137        const uint8_t firstByte = readUint8(buffer, *pos);
138        if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
139            if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
140                *pos += 1;
141                return NOT_A_CODE_POINT;
142            } else {
143                return readUint24AndAdvancePosition(buffer, pos);
144            }
145        } else {
146            *pos += 1;
147            return firstByte;
148        }
149    }
150
151    /**
152     * String (array of code points) Reading
153     *
154     * Reads code points until the terminator is found.
155     */
156    // Returns the length of the string.
157    static int readStringAndAdvancePosition(const uint8_t *const buffer,
158            const int maxLength, int *const outBuffer, int *const pos) {
159        int length = 0;
160        int codePoint = readCodePointAndAdvancePosition(buffer, pos);
161        while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
162            outBuffer[length++] = codePoint;
163            codePoint = readCodePointAndAdvancePosition(buffer, pos);
164        }
165        return length;
166    }
167
168    // Advances the position and returns the length of the string.
169    static int advancePositionToBehindString(
170            const uint8_t *const buffer, const int maxLength, int *const pos) {
171        int length = 0;
172        int codePoint = readCodePointAndAdvancePosition(buffer, pos);
173        while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
174            codePoint = readCodePointAndAdvancePosition(buffer, pos);
175            length++;
176        }
177        return length;
178    }
179
180    /**
181     * String (array of code points) Writing
182     */
183    static void writeCodePointsAndAdvancePosition(uint8_t *const buffer,
184            const int *const codePoints, const int codePointCount, const bool writesTerminator,
185            int *const pos) {
186        for (int i = 0; i < codePointCount; ++i) {
187            const int codePoint = codePoints[i];
188            if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
189                break;
190            } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
191                    || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
192                // three bytes character.
193                writeUint24AndAdvancePosition(buffer, codePoint, pos);
194            } else {
195                // one byte character.
196                writeUint8AndAdvancePosition(buffer, codePoint, pos);
197            }
198        }
199        if (writesTerminator) {
200            writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos);
201        }
202    }
203
204    static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints,
205            const int codePointCount, const bool writesTerminator) {
206        int byteCount = 0;
207        for (int i = 0; i < codePointCount; ++i) {
208            const int codePoint = codePoints[i];
209            if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
210                break;
211            } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
212                    || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
213                // three bytes character.
214                byteCount += 3;
215            } else {
216                // one byte character.
217                byteCount += 1;
218            }
219        }
220        if (writesTerminator) {
221            // The terminator is one byte.
222            byteCount += 1;
223        }
224        return byteCount;
225    }
226
227 private:
228    DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);
229
230    static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE;
231    static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE;
232    static const uint8_t CHARACTER_ARRAY_TERMINATOR;
233
234    static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer,
235            const uint32_t data, int *const pos) {
236        buffer[(*pos)++] = (data >> 24) & 0xFF;
237        buffer[(*pos)++] = (data >> 16) & 0xFF;
238        buffer[(*pos)++] = (data >> 8) & 0xFF;
239        buffer[(*pos)++] = data & 0xFF;
240    }
241
242    static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer,
243            const uint32_t data, int *const pos) {
244        buffer[(*pos)++] = (data >> 16) & 0xFF;
245        buffer[(*pos)++] = (data >> 8) & 0xFF;
246        buffer[(*pos)++] = data & 0xFF;
247    }
248
249    static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer,
250            const uint16_t data, int *const pos) {
251        buffer[(*pos)++] = (data >> 8) & 0xFF;
252        buffer[(*pos)++] = data & 0xFF;
253    }
254
255    static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer,
256            const uint8_t data, int *const pos) {
257        buffer[(*pos)++] = data & 0xFF;
258    }
259};
260} // namespace latinime
261#endif /* LATINIME_BYTE_ARRAY_UTILS_H */
262