1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <fcntl.h>
18#include <stdlib.h>
19#include <sys/mman.h>
20
21extern "C" {
22    #include <fec.h>
23}
24
25#include "fec_private.h"
26
27using rs_unique_ptr = std::unique_ptr<void, decltype(&free_rs_char)>;
28
29/* prints a hexdump of `data' using warn(...) */
30static void dump(const char *name, uint64_t value, const uint8_t *data,
31        size_t size)
32{
33    const int bytes_per_line = 16;
34    char hex[bytes_per_line * 3 + 1];
35    char prn[bytes_per_line + 1];
36
37    warn("%s (%" PRIu64 ") (%zu bytes):", name ? name : "", value, size);
38
39    if (!data) {
40        warn("    (null)");
41        return;
42    }
43
44    for (size_t n = 0; n < size; n += bytes_per_line) {
45        memset(hex, 0, sizeof(hex));
46        memset(prn, 0, sizeof(prn));
47
48        for (size_t m = 0; m < bytes_per_line; ++m) {
49            if (n + m < size) {
50                ptrdiff_t offset = &hex[m * 3] - hex;
51                snprintf(hex + offset, sizeof(hex) - offset, "%02x ",
52                         data[n + m]);
53
54                if (isprint(data[n + m])) {
55                    prn[m] = data[n + m];
56                } else {
57                    prn[m] = '.';
58                }
59            } else {
60                strcpy(&hex[m * 3], "   ");
61            }
62        }
63
64        warn("    %04zu   %s  %s", n, hex, prn);
65    }
66}
67
68/* checks if `offset' is within a corrupted block */
69static inline bool is_erasure(fec_handle *f, uint64_t offset,
70        const uint8_t *data)
71{
72    if (unlikely(offset >= f->data_size)) {
73        return false;
74    }
75
76    /* ideally, we would like to know if a specific byte on this block has
77       been corrupted, but knowing whether any of them is can be useful as
78       well, because often the entire block is corrupted */
79
80    uint64_t n = offset / FEC_BLOCKSIZE;
81
82    return !verity_check_block(f, &f->verity.hash[n * SHA256_DIGEST_LENGTH],
83                data);
84}
85
86/* check if `offset' is within a block expected to contain zeros */
87static inline bool is_zero(fec_handle *f, uint64_t offset)
88{
89    verity_info *v = &f->verity;
90
91    if (!v->hash || unlikely(offset >= f->data_size)) {
92        return false;
93    }
94
95    uint64_t hash_offset = (offset / FEC_BLOCKSIZE) * SHA256_DIGEST_LENGTH;
96
97    if (unlikely(hash_offset >
98            v->hash_data_blocks * FEC_BLOCKSIZE - SHA256_DIGEST_LENGTH)) {
99        return false;
100    }
101
102    return !memcmp(v->zero_hash, &v->hash[hash_offset], SHA256_DIGEST_LENGTH);
103}
104
105/* reads and decodes a single block starting from `offset', returns the number
106   of bytes corrected in `errors' */
107static int __ecc_read(fec_handle *f, void *rs, uint8_t *dest, uint64_t offset,
108        bool use_erasures, uint8_t *ecc_data, size_t *errors)
109{
110    check(offset % FEC_BLOCKSIZE == 0);
111    ecc_info *e = &f->ecc;
112
113    /* reverse interleaving: calculate the RS block that includes the requested
114       offset */
115    uint64_t rsb = offset - (offset / (e->rounds * FEC_BLOCKSIZE)) *
116                        e->rounds * FEC_BLOCKSIZE;
117    int data_index = -1;
118    int erasures[e->rsn];
119    int neras = 0;
120
121    /* verity is required to check for erasures */
122    check(!use_erasures || f->verity.hash);
123
124    for (int i = 0; i < e->rsn; ++i) {
125        uint64_t interleaved = fec_ecc_interleave(rsb * e->rsn + i, e->rsn,
126                                    e->rounds);
127
128        if (interleaved == offset) {
129            data_index = i;
130        }
131
132        /* to improve our chances of correcting IO errors, initialize the
133           buffer to zeros even if we are going to read to it later */
134        uint8_t bbuf[FEC_BLOCKSIZE] = {0};
135
136        if (likely(interleaved < e->start) && !is_zero(f, interleaved)) {
137            /* copy raw data to reconstruct the RS block */
138            if (!raw_pread(f, bbuf, FEC_BLOCKSIZE, interleaved)) {
139                warn("failed to read: %s", strerror(errno));
140
141                /* treat errors as corruption */
142                if (use_erasures && neras <= e->roots) {
143                    erasures[neras++] = i;
144                }
145            } else if (use_erasures && neras <= e->roots &&
146                            is_erasure(f, interleaved, bbuf)) {
147                erasures[neras++] = i;
148            }
149        }
150
151        for (int j = 0; j < FEC_BLOCKSIZE; ++j) {
152            ecc_data[j * FEC_RSM + i] = bbuf[j];
153        }
154    }
155
156    check(data_index >= 0);
157
158    size_t nerrs = 0;
159    uint8_t copy[FEC_RSM];
160
161    for (int i = 0; i < FEC_BLOCKSIZE; ++i) {
162        /* copy parity data */
163        if (!raw_pread(f, &ecc_data[i * FEC_RSM + e->rsn], e->roots,
164                e->start + (i + rsb) * e->roots)) {
165            error("failed to read ecc data: %s", strerror(errno));
166            return -1;
167        }
168
169        /* for debugging decoding failures, because decode_rs_char can mangle
170           ecc_data */
171        if (unlikely(use_erasures)) {
172            memcpy(copy, &ecc_data[i * FEC_RSM], FEC_RSM);
173        }
174
175        /* decode */
176        int rc = decode_rs_char(rs, &ecc_data[i * FEC_RSM], erasures, neras);
177
178        if (unlikely(rc < 0)) {
179            if (use_erasures) {
180                error("RS block %" PRIu64 ": decoding failed (%d erasures)",
181                    rsb, neras);
182                dump("raw RS block", rsb, copy, FEC_RSM);
183            } else if (!f->verity.hash) {
184                warn("RS block %" PRIu64 ": decoding failed", rsb);
185            } else {
186                debug("RS block %" PRIu64 ": decoding failed", rsb);
187            }
188
189            errno = EIO;
190            return -1;
191        } else if (unlikely(rc > 0)) {
192            check(rc <= (use_erasures ? e->roots : e->roots / 2));
193            nerrs += rc;
194        }
195
196        dest[i] = ecc_data[i * FEC_RSM + data_index];
197    }
198
199    if (nerrs) {
200        warn("RS block %" PRIu64 ": corrected %zu errors", rsb, nerrs);
201        *errors += nerrs;
202    }
203
204    return FEC_BLOCKSIZE;
205}
206
207/* initializes RS decoder and allocates memory for interleaving */
208static int ecc_init(fec_handle *f, rs_unique_ptr& rs,
209        std::unique_ptr<uint8_t[]>& ecc_data)
210{
211    check(f);
212
213    rs.reset(init_rs_char(FEC_PARAMS(f->ecc.roots)));
214
215    if (unlikely(!rs)) {
216        error("failed to initialize RS");
217        errno = ENOMEM;
218        return -1;
219    }
220
221    ecc_data.reset(new (std::nothrow) uint8_t[FEC_RSM * FEC_BLOCKSIZE]);
222
223    if (unlikely(!ecc_data)) {
224        error("failed to allocate ecc buffer");
225        errno = ENOMEM;
226        return -1;
227    }
228
229    return 0;
230}
231
232/* reads `count' bytes from `offset' and corrects possible errors without
233   erasure detection, returning the number of corrected bytes in `errors' */
234static ssize_t ecc_read(fec_handle *f, uint8_t *dest, size_t count,
235        uint64_t offset, size_t *errors)
236{
237    check(f);
238    check(dest);
239    check(offset < f->data_size);
240    check(offset + count <= f->data_size);
241    check(errors);
242
243    debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);
244
245    rs_unique_ptr rs(NULL, free_rs_char);
246    std::unique_ptr<uint8_t[]> ecc_data;
247
248    if (ecc_init(f, rs, ecc_data) == -1) {
249        return -1;
250    }
251
252    uint64_t curr = offset / FEC_BLOCKSIZE;
253    size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
254    size_t left = count;
255
256    uint8_t data[FEC_BLOCKSIZE];
257
258    while (left > 0) {
259        /* there's no erasure detection without verity metadata */
260        if (__ecc_read(f, rs.get(), data, curr * FEC_BLOCKSIZE, false,
261                ecc_data.get(), errors) == -1) {
262            return -1;
263        }
264
265        size_t copy = FEC_BLOCKSIZE - coff;
266
267        if (copy > left) {
268            copy = left;
269        }
270
271        memcpy(dest, &data[coff], copy);
272
273        dest += copy;
274        left -= copy;
275        coff = 0;
276        ++curr;
277    }
278
279    return count;
280}
281
282/* reads `count' bytes from `offset', corrects possible errors with
283   erasure detection, and verifies the integrity of read data using
284   verity hash tree; returns the number of corrections in `errors' */
285static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count,
286        uint64_t offset, size_t *errors)
287{
288    check(f);
289    check(dest);
290    check(offset < f->data_size);
291    check(offset + count <= f->data_size);
292    check(f->verity.hash);
293    check(errors);
294
295    debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);
296
297    rs_unique_ptr rs(NULL, free_rs_char);
298    std::unique_ptr<uint8_t[]> ecc_data;
299
300    if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) {
301        return -1;
302    }
303
304    uint64_t curr = offset / FEC_BLOCKSIZE;
305    size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
306    size_t left = count;
307    uint8_t data[FEC_BLOCKSIZE];
308
309    uint64_t max_hash_block = (f->verity.hash_data_blocks * FEC_BLOCKSIZE -
310                                SHA256_DIGEST_LENGTH) / SHA256_DIGEST_LENGTH;
311
312    while (left > 0) {
313        check(curr <= max_hash_block);
314
315        uint8_t *hash = &f->verity.hash[curr * SHA256_DIGEST_LENGTH];
316        uint64_t curr_offset = curr * FEC_BLOCKSIZE;
317
318        bool expect_zeros = is_zero(f, curr_offset);
319
320        /* if we are in read-only mode and expect to read a zero block,
321           skip reading and just return zeros */
322        if (f->mode & O_RDONLY && expect_zeros) {
323            memset(data, 0, FEC_BLOCKSIZE);
324            goto valid;
325        }
326
327        /* copy raw data without error correction */
328        if (!raw_pread(f, data, FEC_BLOCKSIZE, curr_offset)) {
329            error("failed to read: %s", strerror(errno));
330            return -1;
331        }
332
333        if (likely(verity_check_block(f, hash, data))) {
334            goto valid;
335        }
336
337        /* we know the block is supposed to contain zeros, so return zeros
338           instead of trying to correct it */
339        if (expect_zeros) {
340            memset(data, 0, FEC_BLOCKSIZE);
341            goto corrected;
342        }
343
344        if (!f->ecc.start) {
345            /* fatal error without ecc */
346            error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
347                offset, offset + count, curr);
348            return -1;
349        } else {
350            debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
351                offset, offset + count, curr);
352        }
353
354        /* try to correct without erasures first, because checking for
355           erasure locations is slower */
356        if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(),
357                errors) == FEC_BLOCKSIZE &&
358            verity_check_block(f, hash, data)) {
359            goto corrected;
360        }
361
362        /* try to correct with erasures */
363        if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(),
364                errors) == FEC_BLOCKSIZE &&
365            verity_check_block(f, hash, data)) {
366            goto corrected;
367        }
368
369        error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64
370            " (offset %" PRIu64 ") cannot be recovered",
371            offset, offset + count, curr, curr_offset);
372        dump("decoded block", curr, data, FEC_BLOCKSIZE);
373
374        errno = EIO;
375        return -1;
376
377corrected:
378        /* update the corrected block to the file if we are in r/w mode */
379        if (f->mode & O_RDWR &&
380                !raw_pwrite(f, data, FEC_BLOCKSIZE, curr_offset)) {
381            error("failed to write: %s", strerror(errno));
382            return -1;
383        }
384
385valid:
386        size_t copy = FEC_BLOCKSIZE - coff;
387
388        if (copy > left) {
389            copy = left;
390        }
391
392        memcpy(dest, &data[coff], copy);
393
394        dest += copy;
395        left -= copy;
396        coff = 0;
397        ++curr;
398    }
399
400    return count;
401}
402
403/* sets the internal file position to `offset' relative to `whence' */
404int fec_seek(struct fec_handle *f, int64_t offset, int whence)
405{
406    check(f);
407
408    if (whence == SEEK_SET) {
409        if (offset < 0) {
410            errno = EOVERFLOW;
411            return -1;
412        }
413
414        f->pos = offset;
415    } else if (whence == SEEK_CUR) {
416        if (offset < 0 && f->pos < (uint64_t)-offset) {
417            errno = EOVERFLOW;
418            return -1;
419        } else if (offset > 0 && (uint64_t)offset > UINT64_MAX - f->pos) {
420            errno = EOVERFLOW;
421            return -1;
422        }
423
424        f->pos += offset;
425    } else if (whence == SEEK_END) {
426        if (offset >= 0) {
427            errno = ENXIO;
428            return -1;
429        } else if ((uint64_t)-offset > f->size) {
430            errno = EOVERFLOW;
431            return -1;
432        }
433
434        f->pos = f->size + offset;
435    } else {
436        errno = EINVAL;
437        return -1;
438    }
439
440    return 0;
441}
442
443/* reads up to `count' bytes starting from the internal file position using
444   error correction and integrity validation, if available */
445ssize_t fec_read(struct fec_handle *f, void *buf, size_t count)
446{
447    ssize_t rc = fec_pread(f, buf, count, f->pos);
448
449    if (rc > 0) {
450        check(f->pos < UINT64_MAX - rc);
451        f->pos += rc;
452    }
453
454    return rc;
455}
456
457/* for a file with size `max', returns the number of bytes we can read starting
458   from `offset', up to `count' bytes */
459static inline size_t get_max_count(uint64_t offset, size_t count, uint64_t max)
460{
461    if (offset >= max) {
462        return 0;
463    } else if (offset > max - count) {
464        return (size_t)(max - offset);
465    }
466
467    return count;
468}
469
470/* reads `count' bytes from `f->fd' starting from `offset', and copies the
471   data to `buf' */
472bool raw_pread(fec_handle *f, void *buf, size_t count, uint64_t offset)
473{
474    check(f);
475    check(buf);
476
477    uint8_t *p = (uint8_t *)buf;
478    size_t remaining = count;
479
480    while (remaining > 0) {
481        ssize_t n = TEMP_FAILURE_RETRY(pread64(f->fd, p, remaining, offset));
482
483        if (n <= 0) {
484            return false;
485        }
486
487        p += n;
488        remaining -= n;
489        offset += n;
490    }
491
492    return true;
493}
494
495/* writes `count' bytes from `buf' to `f->fd' to a file position `offset' */
496bool raw_pwrite(fec_handle *f, const void *buf, size_t count, uint64_t offset)
497{
498    check(f);
499    check(buf);
500
501    const uint8_t *p = (const uint8_t *)buf;
502    size_t remaining = count;
503
504    while (remaining > 0) {
505        ssize_t n = TEMP_FAILURE_RETRY(pwrite64(f->fd, p, remaining, offset));
506
507        if (n <= 0) {
508            return false;
509        }
510
511        p += n;
512        remaining -= n;
513        offset += n;
514    }
515
516    return true;
517}
518
519/* reads up to `count' bytes starting from `offset' using error correction and
520   integrity validation, if available */
521ssize_t fec_pread(struct fec_handle *f, void *buf, size_t count,
522        uint64_t offset)
523{
524    check(f);
525    check(buf);
526
527    if (unlikely(offset > UINT64_MAX - count)) {
528        errno = EOVERFLOW;
529        return -1;
530    }
531
532    if (f->verity.hash) {
533        return process(f, (uint8_t *)buf,
534                    get_max_count(offset, count, f->data_size), offset,
535                    verity_read);
536    } else if (f->ecc.start) {
537        check(f->ecc.start < f->size);
538
539        count = get_max_count(offset, count, f->data_size);
540        ssize_t rc = process(f, (uint8_t *)buf, count, offset, ecc_read);
541
542        if (rc >= 0) {
543            return rc;
544        }
545
546        /* return raw data if pure ecc read fails; due to interleaving
547           the specific blocks the caller wants may still be fine */
548    } else {
549        count = get_max_count(offset, count, f->size);
550    }
551
552    if (raw_pread(f, buf, count, offset)) {
553        return count;
554    }
555
556    return -1;
557}
558