1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <fcntl.h> 18#include <stdlib.h> 19#include <sys/mman.h> 20 21extern "C" { 22 #include <fec.h> 23} 24 25#include "fec_private.h" 26 27using rs_unique_ptr = std::unique_ptr<void, decltype(&free_rs_char)>; 28 29/* prints a hexdump of `data' using warn(...) */ 30static void dump(const char *name, uint64_t value, const uint8_t *data, 31 size_t size) 32{ 33 const int bytes_per_line = 16; 34 char hex[bytes_per_line * 3 + 1]; 35 char prn[bytes_per_line + 1]; 36 37 warn("%s (%" PRIu64 ") (%zu bytes):", name ? name : "", value, size); 38 39 if (!data) { 40 warn(" (null)"); 41 return; 42 } 43 44 for (size_t n = 0; n < size; n += bytes_per_line) { 45 memset(hex, 0, sizeof(hex)); 46 memset(prn, 0, sizeof(prn)); 47 48 for (size_t m = 0; m < bytes_per_line; ++m) { 49 if (n + m < size) { 50 ptrdiff_t offset = &hex[m * 3] - hex; 51 snprintf(hex + offset, sizeof(hex) - offset, "%02x ", 52 data[n + m]); 53 54 if (isprint(data[n + m])) { 55 prn[m] = data[n + m]; 56 } else { 57 prn[m] = '.'; 58 } 59 } else { 60 strcpy(&hex[m * 3], " "); 61 } 62 } 63 64 warn(" %04zu %s %s", n, hex, prn); 65 } 66} 67 68/* checks if `offset' is within a corrupted block */ 69static inline bool is_erasure(fec_handle *f, uint64_t offset, 70 const uint8_t *data) 71{ 72 if (unlikely(offset >= f->data_size)) { 73 return false; 74 } 75 76 /* ideally, we would like to know if a specific byte on this block has 77 been corrupted, but knowing whether any of them is can be useful as 78 well, because often the entire block is corrupted */ 79 80 uint64_t n = offset / FEC_BLOCKSIZE; 81 82 return !verity_check_block(f, &f->verity.hash[n * SHA256_DIGEST_LENGTH], 83 data); 84} 85 86/* check if `offset' is within a block expected to contain zeros */ 87static inline bool is_zero(fec_handle *f, uint64_t offset) 88{ 89 verity_info *v = &f->verity; 90 91 if (!v->hash || unlikely(offset >= f->data_size)) { 92 return false; 93 } 94 95 uint64_t hash_offset = (offset / FEC_BLOCKSIZE) * SHA256_DIGEST_LENGTH; 96 97 if (unlikely(hash_offset > 98 v->hash_data_blocks * FEC_BLOCKSIZE - SHA256_DIGEST_LENGTH)) { 99 return false; 100 } 101 102 return !memcmp(v->zero_hash, &v->hash[hash_offset], SHA256_DIGEST_LENGTH); 103} 104 105/* reads and decodes a single block starting from `offset', returns the number 106 of bytes corrected in `errors' */ 107static int __ecc_read(fec_handle *f, void *rs, uint8_t *dest, uint64_t offset, 108 bool use_erasures, uint8_t *ecc_data, size_t *errors) 109{ 110 check(offset % FEC_BLOCKSIZE == 0); 111 ecc_info *e = &f->ecc; 112 113 /* reverse interleaving: calculate the RS block that includes the requested 114 offset */ 115 uint64_t rsb = offset - (offset / (e->rounds * FEC_BLOCKSIZE)) * 116 e->rounds * FEC_BLOCKSIZE; 117 int data_index = -1; 118 int erasures[e->rsn]; 119 int neras = 0; 120 121 /* verity is required to check for erasures */ 122 check(!use_erasures || f->verity.hash); 123 124 for (int i = 0; i < e->rsn; ++i) { 125 uint64_t interleaved = fec_ecc_interleave(rsb * e->rsn + i, e->rsn, 126 e->rounds); 127 128 if (interleaved == offset) { 129 data_index = i; 130 } 131 132 /* to improve our chances of correcting IO errors, initialize the 133 buffer to zeros even if we are going to read to it later */ 134 uint8_t bbuf[FEC_BLOCKSIZE] = {0}; 135 136 if (likely(interleaved < e->start) && !is_zero(f, interleaved)) { 137 /* copy raw data to reconstruct the RS block */ 138 if (!raw_pread(f, bbuf, FEC_BLOCKSIZE, interleaved)) { 139 warn("failed to read: %s", strerror(errno)); 140 141 /* treat errors as corruption */ 142 if (use_erasures && neras <= e->roots) { 143 erasures[neras++] = i; 144 } 145 } else if (use_erasures && neras <= e->roots && 146 is_erasure(f, interleaved, bbuf)) { 147 erasures[neras++] = i; 148 } 149 } 150 151 for (int j = 0; j < FEC_BLOCKSIZE; ++j) { 152 ecc_data[j * FEC_RSM + i] = bbuf[j]; 153 } 154 } 155 156 check(data_index >= 0); 157 158 size_t nerrs = 0; 159 uint8_t copy[FEC_RSM]; 160 161 for (int i = 0; i < FEC_BLOCKSIZE; ++i) { 162 /* copy parity data */ 163 if (!raw_pread(f, &ecc_data[i * FEC_RSM + e->rsn], e->roots, 164 e->start + (i + rsb) * e->roots)) { 165 error("failed to read ecc data: %s", strerror(errno)); 166 return -1; 167 } 168 169 /* for debugging decoding failures, because decode_rs_char can mangle 170 ecc_data */ 171 if (unlikely(use_erasures)) { 172 memcpy(copy, &ecc_data[i * FEC_RSM], FEC_RSM); 173 } 174 175 /* decode */ 176 int rc = decode_rs_char(rs, &ecc_data[i * FEC_RSM], erasures, neras); 177 178 if (unlikely(rc < 0)) { 179 if (use_erasures) { 180 error("RS block %" PRIu64 ": decoding failed (%d erasures)", 181 rsb, neras); 182 dump("raw RS block", rsb, copy, FEC_RSM); 183 } else if (!f->verity.hash) { 184 warn("RS block %" PRIu64 ": decoding failed", rsb); 185 } else { 186 debug("RS block %" PRIu64 ": decoding failed", rsb); 187 } 188 189 errno = EIO; 190 return -1; 191 } else if (unlikely(rc > 0)) { 192 check(rc <= (use_erasures ? e->roots : e->roots / 2)); 193 nerrs += rc; 194 } 195 196 dest[i] = ecc_data[i * FEC_RSM + data_index]; 197 } 198 199 if (nerrs) { 200 warn("RS block %" PRIu64 ": corrected %zu errors", rsb, nerrs); 201 *errors += nerrs; 202 } 203 204 return FEC_BLOCKSIZE; 205} 206 207/* initializes RS decoder and allocates memory for interleaving */ 208static int ecc_init(fec_handle *f, rs_unique_ptr& rs, 209 std::unique_ptr<uint8_t[]>& ecc_data) 210{ 211 check(f); 212 213 rs.reset(init_rs_char(FEC_PARAMS(f->ecc.roots))); 214 215 if (unlikely(!rs)) { 216 error("failed to initialize RS"); 217 errno = ENOMEM; 218 return -1; 219 } 220 221 ecc_data.reset(new (std::nothrow) uint8_t[FEC_RSM * FEC_BLOCKSIZE]); 222 223 if (unlikely(!ecc_data)) { 224 error("failed to allocate ecc buffer"); 225 errno = ENOMEM; 226 return -1; 227 } 228 229 return 0; 230} 231 232/* reads `count' bytes from `offset' and corrects possible errors without 233 erasure detection, returning the number of corrected bytes in `errors' */ 234static ssize_t ecc_read(fec_handle *f, uint8_t *dest, size_t count, 235 uint64_t offset, size_t *errors) 236{ 237 check(f); 238 check(dest); 239 check(offset < f->data_size); 240 check(offset + count <= f->data_size); 241 check(errors); 242 243 debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count); 244 245 rs_unique_ptr rs(NULL, free_rs_char); 246 std::unique_ptr<uint8_t[]> ecc_data; 247 248 if (ecc_init(f, rs, ecc_data) == -1) { 249 return -1; 250 } 251 252 uint64_t curr = offset / FEC_BLOCKSIZE; 253 size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE); 254 size_t left = count; 255 256 uint8_t data[FEC_BLOCKSIZE]; 257 258 while (left > 0) { 259 /* there's no erasure detection without verity metadata */ 260 if (__ecc_read(f, rs.get(), data, curr * FEC_BLOCKSIZE, false, 261 ecc_data.get(), errors) == -1) { 262 return -1; 263 } 264 265 size_t copy = FEC_BLOCKSIZE - coff; 266 267 if (copy > left) { 268 copy = left; 269 } 270 271 memcpy(dest, &data[coff], copy); 272 273 dest += copy; 274 left -= copy; 275 coff = 0; 276 ++curr; 277 } 278 279 return count; 280} 281 282/* reads `count' bytes from `offset', corrects possible errors with 283 erasure detection, and verifies the integrity of read data using 284 verity hash tree; returns the number of corrections in `errors' */ 285static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count, 286 uint64_t offset, size_t *errors) 287{ 288 check(f); 289 check(dest); 290 check(offset < f->data_size); 291 check(offset + count <= f->data_size); 292 check(f->verity.hash); 293 check(errors); 294 295 debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count); 296 297 rs_unique_ptr rs(NULL, free_rs_char); 298 std::unique_ptr<uint8_t[]> ecc_data; 299 300 if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) { 301 return -1; 302 } 303 304 uint64_t curr = offset / FEC_BLOCKSIZE; 305 size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE); 306 size_t left = count; 307 uint8_t data[FEC_BLOCKSIZE]; 308 309 uint64_t max_hash_block = (f->verity.hash_data_blocks * FEC_BLOCKSIZE - 310 SHA256_DIGEST_LENGTH) / SHA256_DIGEST_LENGTH; 311 312 while (left > 0) { 313 check(curr <= max_hash_block); 314 315 uint8_t *hash = &f->verity.hash[curr * SHA256_DIGEST_LENGTH]; 316 uint64_t curr_offset = curr * FEC_BLOCKSIZE; 317 318 bool expect_zeros = is_zero(f, curr_offset); 319 320 /* if we are in read-only mode and expect to read a zero block, 321 skip reading and just return zeros */ 322 if (f->mode & O_RDONLY && expect_zeros) { 323 memset(data, 0, FEC_BLOCKSIZE); 324 goto valid; 325 } 326 327 /* copy raw data without error correction */ 328 if (!raw_pread(f, data, FEC_BLOCKSIZE, curr_offset)) { 329 error("failed to read: %s", strerror(errno)); 330 return -1; 331 } 332 333 if (likely(verity_check_block(f, hash, data))) { 334 goto valid; 335 } 336 337 /* we know the block is supposed to contain zeros, so return zeros 338 instead of trying to correct it */ 339 if (expect_zeros) { 340 memset(data, 0, FEC_BLOCKSIZE); 341 goto corrected; 342 } 343 344 if (!f->ecc.start) { 345 /* fatal error without ecc */ 346 error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64, 347 offset, offset + count, curr); 348 return -1; 349 } else { 350 debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64, 351 offset, offset + count, curr); 352 } 353 354 /* try to correct without erasures first, because checking for 355 erasure locations is slower */ 356 if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(), 357 errors) == FEC_BLOCKSIZE && 358 verity_check_block(f, hash, data)) { 359 goto corrected; 360 } 361 362 /* try to correct with erasures */ 363 if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(), 364 errors) == FEC_BLOCKSIZE && 365 verity_check_block(f, hash, data)) { 366 goto corrected; 367 } 368 369 error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64 370 " (offset %" PRIu64 ") cannot be recovered", 371 offset, offset + count, curr, curr_offset); 372 dump("decoded block", curr, data, FEC_BLOCKSIZE); 373 374 errno = EIO; 375 return -1; 376 377corrected: 378 /* update the corrected block to the file if we are in r/w mode */ 379 if (f->mode & O_RDWR && 380 !raw_pwrite(f, data, FEC_BLOCKSIZE, curr_offset)) { 381 error("failed to write: %s", strerror(errno)); 382 return -1; 383 } 384 385valid: 386 size_t copy = FEC_BLOCKSIZE - coff; 387 388 if (copy > left) { 389 copy = left; 390 } 391 392 memcpy(dest, &data[coff], copy); 393 394 dest += copy; 395 left -= copy; 396 coff = 0; 397 ++curr; 398 } 399 400 return count; 401} 402 403/* sets the internal file position to `offset' relative to `whence' */ 404int fec_seek(struct fec_handle *f, int64_t offset, int whence) 405{ 406 check(f); 407 408 if (whence == SEEK_SET) { 409 if (offset < 0) { 410 errno = EOVERFLOW; 411 return -1; 412 } 413 414 f->pos = offset; 415 } else if (whence == SEEK_CUR) { 416 if (offset < 0 && f->pos < (uint64_t)-offset) { 417 errno = EOVERFLOW; 418 return -1; 419 } else if (offset > 0 && (uint64_t)offset > UINT64_MAX - f->pos) { 420 errno = EOVERFLOW; 421 return -1; 422 } 423 424 f->pos += offset; 425 } else if (whence == SEEK_END) { 426 if (offset >= 0) { 427 errno = ENXIO; 428 return -1; 429 } else if ((uint64_t)-offset > f->size) { 430 errno = EOVERFLOW; 431 return -1; 432 } 433 434 f->pos = f->size + offset; 435 } else { 436 errno = EINVAL; 437 return -1; 438 } 439 440 return 0; 441} 442 443/* reads up to `count' bytes starting from the internal file position using 444 error correction and integrity validation, if available */ 445ssize_t fec_read(struct fec_handle *f, void *buf, size_t count) 446{ 447 ssize_t rc = fec_pread(f, buf, count, f->pos); 448 449 if (rc > 0) { 450 check(f->pos < UINT64_MAX - rc); 451 f->pos += rc; 452 } 453 454 return rc; 455} 456 457/* for a file with size `max', returns the number of bytes we can read starting 458 from `offset', up to `count' bytes */ 459static inline size_t get_max_count(uint64_t offset, size_t count, uint64_t max) 460{ 461 if (offset >= max) { 462 return 0; 463 } else if (offset > max - count) { 464 return (size_t)(max - offset); 465 } 466 467 return count; 468} 469 470/* reads `count' bytes from `f->fd' starting from `offset', and copies the 471 data to `buf' */ 472bool raw_pread(fec_handle *f, void *buf, size_t count, uint64_t offset) 473{ 474 check(f); 475 check(buf); 476 477 uint8_t *p = (uint8_t *)buf; 478 size_t remaining = count; 479 480 while (remaining > 0) { 481 ssize_t n = TEMP_FAILURE_RETRY(pread64(f->fd, p, remaining, offset)); 482 483 if (n <= 0) { 484 return false; 485 } 486 487 p += n; 488 remaining -= n; 489 offset += n; 490 } 491 492 return true; 493} 494 495/* writes `count' bytes from `buf' to `f->fd' to a file position `offset' */ 496bool raw_pwrite(fec_handle *f, const void *buf, size_t count, uint64_t offset) 497{ 498 check(f); 499 check(buf); 500 501 const uint8_t *p = (const uint8_t *)buf; 502 size_t remaining = count; 503 504 while (remaining > 0) { 505 ssize_t n = TEMP_FAILURE_RETRY(pwrite64(f->fd, p, remaining, offset)); 506 507 if (n <= 0) { 508 return false; 509 } 510 511 p += n; 512 remaining -= n; 513 offset += n; 514 } 515 516 return true; 517} 518 519/* reads up to `count' bytes starting from `offset' using error correction and 520 integrity validation, if available */ 521ssize_t fec_pread(struct fec_handle *f, void *buf, size_t count, 522 uint64_t offset) 523{ 524 check(f); 525 check(buf); 526 527 if (unlikely(offset > UINT64_MAX - count)) { 528 errno = EOVERFLOW; 529 return -1; 530 } 531 532 if (f->verity.hash) { 533 return process(f, (uint8_t *)buf, 534 get_max_count(offset, count, f->data_size), offset, 535 verity_read); 536 } else if (f->ecc.start) { 537 check(f->ecc.start < f->size); 538 539 count = get_max_count(offset, count, f->data_size); 540 ssize_t rc = process(f, (uint8_t *)buf, count, offset, ecc_read); 541 542 if (rc >= 0) { 543 return rc; 544 } 545 546 /* return raw data if pure ecc read fails; due to interleaving 547 the specific blocks the caller wants may still be fine */ 548 } else { 549 count = get_max_count(offset, count, f->size); 550 } 551 552 if (raw_pread(f, buf, count, offset)) { 553 return count; 554 } 555 556 return -1; 557} 558