cpu-features.c revision e3f21d482a5f8635c9e5584b44a7942d2b4ee8c7
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/* ChangeLog for this library: 30 * 31 * NDK r8d: Add android_setCpu(). 32 * 33 * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16, 34 * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt. 35 * 36 * Rewrite the code to parse /proc/self/auxv instead of 37 * the "Features" field in /proc/cpuinfo. 38 * 39 * Dynamically allocate the buffer that hold the content 40 * of /proc/cpuinfo to deal with newer hardware. 41 * 42 * NDK r7c: Fix CPU count computation. The old method only reported the 43 * number of _active_ CPUs when the library was initialized, 44 * which could be less than the real total. 45 * 46 * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7 47 * for an ARMv6 CPU (see below). 48 * 49 * Handle kernels that only report 'neon', and not 'vfpv3' 50 * (VFPv3 is mandated by the ARM architecture is Neon is implemented) 51 * 52 * Handle kernels that only report 'vfpv3d16', and not 'vfpv3' 53 * 54 * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in 55 * android_getCpuFamily(). 56 * 57 * NDK r4: Initial release 58 */ 59 60#if defined(__le32__) || defined(__le64__) 61 62// When users enter this, we should only provide interface and 63// libportable will give the implementations. 64 65#else // !__le32__ && !__le64__ 66 67#include <sys/system_properties.h> 68#include <pthread.h> 69#include "cpu-features.h" 70#include <stdio.h> 71#include <stdlib.h> 72#include <fcntl.h> 73#include <errno.h> 74 75static pthread_once_t g_once; 76static int g_inited; 77static AndroidCpuFamily g_cpuFamily; 78static uint64_t g_cpuFeatures; 79static int g_cpuCount; 80 81#ifdef __arm__ 82static uint32_t g_cpuIdArm; 83#endif 84 85static const int android_cpufeatures_debug = 0; 86 87#ifdef __arm__ 88# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_ARM 89#elif defined __i386__ 90# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_X86 91#else 92# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_UNKNOWN 93#endif 94 95#define D(...) \ 96 do { \ 97 if (android_cpufeatures_debug) { \ 98 printf(__VA_ARGS__); fflush(stdout); \ 99 } \ 100 } while (0) 101 102#ifdef __i386__ 103static __inline__ void x86_cpuid(int func, int values[4]) 104{ 105 int a, b, c, d; 106 /* We need to preserve ebx since we're compiling PIC code */ 107 /* this means we can't use "=b" for the second output register */ 108 __asm__ __volatile__ ( \ 109 "push %%ebx\n" 110 "cpuid\n" \ 111 "mov %%ebx, %1\n" 112 "pop %%ebx\n" 113 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ 114 : "a" (func) \ 115 ); 116 values[0] = a; 117 values[1] = b; 118 values[2] = c; 119 values[3] = d; 120} 121#endif 122 123/* Get the size of a file by reading it until the end. This is needed 124 * because files under /proc do not always return a valid size when 125 * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed. 126 */ 127static int 128get_file_size(const char* pathname) 129{ 130 int fd, ret, result = 0; 131 char buffer[256]; 132 133 fd = open(pathname, O_RDONLY); 134 if (fd < 0) { 135 D("Can't open %s: %s\n", pathname, strerror(errno)); 136 return -1; 137 } 138 139 for (;;) { 140 int ret = read(fd, buffer, sizeof buffer); 141 if (ret < 0) { 142 if (errno == EINTR) 143 continue; 144 D("Error while reading %s: %s\n", pathname, strerror(errno)); 145 break; 146 } 147 if (ret == 0) 148 break; 149 150 result += ret; 151 } 152 close(fd); 153 return result; 154} 155 156/* Read the content of /proc/cpuinfo into a user-provided buffer. 157 * Return the length of the data, or -1 on error. Does *not* 158 * zero-terminate the content. Will not read more 159 * than 'buffsize' bytes. 160 */ 161static int 162read_file(const char* pathname, char* buffer, size_t buffsize) 163{ 164 int fd, count; 165 166 fd = open(pathname, O_RDONLY); 167 if (fd < 0) { 168 D("Could not open %s: %s\n", pathname, strerror(errno)); 169 return -1; 170 } 171 count = 0; 172 while (count < (int)buffsize) { 173 int ret = read(fd, buffer + count, buffsize - count); 174 if (ret < 0) { 175 if (errno == EINTR) 176 continue; 177 D("Error while reading from %s: %s\n", pathname, strerror(errno)); 178 if (count == 0) 179 count = -1; 180 break; 181 } 182 if (ret == 0) 183 break; 184 count += ret; 185 } 186 close(fd); 187 return count; 188} 189 190/* Extract the content of a the first occurence of a given field in 191 * the content of /proc/cpuinfo and return it as a heap-allocated 192 * string that must be freed by the caller. 193 * 194 * Return NULL if not found 195 */ 196static char* 197extract_cpuinfo_field(const char* buffer, int buflen, const char* field) 198{ 199 int fieldlen = strlen(field); 200 const char* bufend = buffer + buflen; 201 char* result = NULL; 202 int len, ignore; 203 const char *p, *q; 204 205 /* Look for first field occurence, and ensures it starts the line. */ 206 p = buffer; 207 for (;;) { 208 p = memmem(p, bufend-p, field, fieldlen); 209 if (p == NULL) 210 goto EXIT; 211 212 if (p == buffer || p[-1] == '\n') 213 break; 214 215 p += fieldlen; 216 } 217 218 /* Skip to the first column followed by a space */ 219 p += fieldlen; 220 p = memchr(p, ':', bufend-p); 221 if (p == NULL || p[1] != ' ') 222 goto EXIT; 223 224 /* Find the end of the line */ 225 p += 2; 226 q = memchr(p, '\n', bufend-p); 227 if (q == NULL) 228 q = bufend; 229 230 /* Copy the line into a heap-allocated buffer */ 231 len = q-p; 232 result = malloc(len+1); 233 if (result == NULL) 234 goto EXIT; 235 236 memcpy(result, p, len); 237 result[len] = '\0'; 238 239EXIT: 240 return result; 241} 242 243/* Checks that a space-separated list of items contains one given 'item'. 244 * Returns 1 if found, 0 otherwise. 245 */ 246static int 247has_list_item(const char* list, const char* item) 248{ 249 const char* p = list; 250 int itemlen = strlen(item); 251 252 if (list == NULL) 253 return 0; 254 255 while (*p) { 256 const char* q; 257 258 /* skip spaces */ 259 while (*p == ' ' || *p == '\t') 260 p++; 261 262 /* find end of current list item */ 263 q = p; 264 while (*q && *q != ' ' && *q != '\t') 265 q++; 266 267 if (itemlen == q-p && !memcmp(p, item, itemlen)) 268 return 1; 269 270 /* skip to next item */ 271 p = q; 272 } 273 return 0; 274} 275 276/* Parse a number starting from 'input', but not going further 277 * than 'limit'. Return the value into '*result'. 278 * 279 * NOTE: Does not skip over leading spaces, or deal with sign characters. 280 * NOTE: Ignores overflows. 281 * 282 * The function returns NULL in case of error (bad format), or the new 283 * position after the decimal number in case of success (which will always 284 * be <= 'limit'). 285 */ 286static const char* 287parse_number(const char* input, const char* limit, int base, int* result) 288{ 289 const char* p = input; 290 int val = 0; 291 while (p < limit) { 292 int d = (*p - '0'); 293 if ((unsigned)d >= 10U) { 294 d = (*p - 'a'); 295 if ((unsigned)d >= 6U) 296 d = (*p - 'A'); 297 if ((unsigned)d >= 6U) 298 break; 299 d += 10; 300 } 301 if (d >= base) 302 break; 303 val = val*base + d; 304 p++; 305 } 306 if (p == input) 307 return NULL; 308 309 *result = val; 310 return p; 311} 312 313static const char* 314parse_decimal(const char* input, const char* limit, int* result) 315{ 316 return parse_number(input, limit, 10, result); 317} 318 319static const char* 320parse_hexadecimal(const char* input, const char* limit, int* result) 321{ 322 return parse_number(input, limit, 16, result); 323} 324 325/* This small data type is used to represent a CPU list / mask, as read 326 * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt 327 * 328 * For now, we don't expect more than 32 cores on mobile devices, so keep 329 * everything simple. 330 */ 331typedef struct { 332 uint32_t mask; 333} CpuList; 334 335static __inline__ void 336cpulist_init(CpuList* list) { 337 list->mask = 0; 338} 339 340static __inline__ void 341cpulist_and(CpuList* list1, CpuList* list2) { 342 list1->mask &= list2->mask; 343} 344 345static __inline__ void 346cpulist_set(CpuList* list, int index) { 347 if ((unsigned)index < 32) { 348 list->mask |= (uint32_t)(1U << index); 349 } 350} 351 352static __inline__ int 353cpulist_count(CpuList* list) { 354 return __builtin_popcount(list->mask); 355} 356 357/* Parse a textual list of cpus and store the result inside a CpuList object. 358 * Input format is the following: 359 * - comma-separated list of items (no spaces) 360 * - each item is either a single decimal number (cpu index), or a range made 361 * of two numbers separated by a single dash (-). Ranges are inclusive. 362 * 363 * Examples: 0 364 * 2,4-127,128-143 365 * 0-1 366 */ 367static void 368cpulist_parse(CpuList* list, const char* line, int line_len) 369{ 370 const char* p = line; 371 const char* end = p + line_len; 372 const char* q; 373 374 /* NOTE: the input line coming from sysfs typically contains a 375 * trailing newline, so take care of it in the code below 376 */ 377 while (p < end && *p != '\n') 378 { 379 int val, start_value, end_value; 380 381 /* Find the end of current item, and put it into 'q' */ 382 q = memchr(p, ',', end-p); 383 if (q == NULL) { 384 q = end; 385 } 386 387 /* Get first value */ 388 p = parse_decimal(p, q, &start_value); 389 if (p == NULL) 390 goto BAD_FORMAT; 391 392 end_value = start_value; 393 394 /* If we're not at the end of the item, expect a dash and 395 * and integer; extract end value. 396 */ 397 if (p < q && *p == '-') { 398 p = parse_decimal(p+1, q, &end_value); 399 if (p == NULL) 400 goto BAD_FORMAT; 401 } 402 403 /* Set bits CPU list bits */ 404 for (val = start_value; val <= end_value; val++) { 405 cpulist_set(list, val); 406 } 407 408 /* Jump to next item */ 409 p = q; 410 if (p < end) 411 p++; 412 } 413 414BAD_FORMAT: 415 ; 416} 417 418/* Read a CPU list from one sysfs file */ 419static void 420cpulist_read_from(CpuList* list, const char* filename) 421{ 422 char file[64]; 423 int filelen; 424 425 cpulist_init(list); 426 427 filelen = read_file(filename, file, sizeof file); 428 if (filelen < 0) { 429 D("Could not read %s: %s\n", filename, strerror(errno)); 430 return; 431 } 432 433 cpulist_parse(list, file, filelen); 434} 435 436// See <asm/hwcap.h> kernel header. 437#define HWCAP_VFP (1 << 6) 438#define HWCAP_IWMMXT (1 << 9) 439#define HWCAP_NEON (1 << 12) 440#define HWCAP_VFPv3 (1 << 13) 441#define HWCAP_VFPv3D16 (1 << 14) 442#define HWCAP_VFPv4 (1 << 16) 443#define HWCAP_IDIVA (1 << 17) 444#define HWCAP_IDIVT (1 << 18) 445 446#define AT_HWCAP 16 447 448#if defined(__arm__) 449/* Compute the ELF HWCAP flags. 450 */ 451static uint32_t 452get_elf_hwcap(const char* cpuinfo, int cpuinfo_len) 453{ 454 /* IMPORTANT: 455 * Accessing /proc/self/auxv doesn't work anymore on all 456 * platform versions. More specifically, when running inside 457 * a regular application process, most of /proc/self/ will be 458 * non-readable, including /proc/self/auxv. This doesn't 459 * happen however if the application is debuggable, or when 460 * running under the "shell" UID, which is why this was not 461 * detected appropriately. 462 */ 463#if 0 464 uint32_t result = 0; 465 const char filepath[] = "/proc/self/auxv"; 466 int fd = open(filepath, O_RDONLY); 467 if (fd < 0) { 468 D("Could not open %s: %s\n", filepath, strerror(errno)); 469 return 0; 470 } 471 472 struct { uint32_t tag; uint32_t value; } entry; 473 474 for (;;) { 475 int ret = read(fd, (char*)&entry, sizeof entry); 476 if (ret < 0) { 477 if (errno == EINTR) 478 continue; 479 D("Error while reading %s: %s\n", filepath, strerror(errno)); 480 break; 481 } 482 // Detect end of list. 483 if (ret == 0 || (entry.tag == 0 && entry.value == 0)) 484 break; 485 if (entry.tag == AT_HWCAP) { 486 result = entry.value; 487 break; 488 } 489 } 490 close(fd); 491 return result; 492#else 493 // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag. 494 uint32_t hwcaps = 0; 495 496 char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features"); 497 498 if (cpuFeatures != NULL) { 499 D("Found cpuFeatures = '%s'\n", cpuFeatures); 500 501 if (has_list_item(cpuFeatures, "vfp")) 502 hwcaps |= HWCAP_VFP; 503 if (has_list_item(cpuFeatures, "vfpv3")) 504 hwcaps |= HWCAP_VFPv3; 505 if (has_list_item(cpuFeatures, "vfpv3d16")) 506 hwcaps |= HWCAP_VFPv3D16; 507 if (has_list_item(cpuFeatures, "vfpv4")) 508 hwcaps |= HWCAP_VFPv4; 509 if (has_list_item(cpuFeatures, "neon")) 510 hwcaps |= HWCAP_NEON; 511 if (has_list_item(cpuFeatures, "idiva")) 512 hwcaps |= HWCAP_IDIVA; 513 if (has_list_item(cpuFeatures, "idivt")) 514 hwcaps |= HWCAP_IDIVT; 515 if (has_list_item(cpuFeatures, "idiv")) 516 hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT; 517 if (has_list_item(cpuFeatures, "iwmmxt")) 518 hwcaps |= HWCAP_IWMMXT; 519 520 free(cpuFeatures); 521 } 522 return hwcaps; 523#endif 524} 525#endif /* __arm__ */ 526 527/* Return the number of cpus present on a given device. 528 * 529 * To handle all weird kernel configurations, we need to compute the 530 * intersection of the 'present' and 'possible' CPU lists and count 531 * the result. 532 */ 533static int 534get_cpu_count(void) 535{ 536 CpuList cpus_present[1]; 537 CpuList cpus_possible[1]; 538 539 cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present"); 540 cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible"); 541 542 /* Compute the intersection of both sets to get the actual number of 543 * CPU cores that can be used on this device by the kernel. 544 */ 545 cpulist_and(cpus_present, cpus_possible); 546 547 return cpulist_count(cpus_present); 548} 549 550static void 551android_cpuInitFamily(void) 552{ 553#if defined(__arm__) 554 g_cpuFamily = ANDROID_CPU_FAMILY_ARM; 555#elif defined(__i386__) 556 g_cpuFamily = ANDROID_CPU_FAMILY_X86; 557#elif defined(__mips64) 558/* Needs to be before __mips__ since the compiler defines both */ 559 g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64; 560#elif defined(__mips__) 561 g_cpuFamily = ANDROID_CPU_FAMILY_MIPS; 562#elif defined(__aarch64__) 563 g_cpuFamily = ANDROID_CPU_FAMILY_ARM64; 564#elif defined(__x86_64__) 565 g_cpuFamily = ANDROID_CPU_FAMILY_X86_64; 566#else 567 g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; 568#endif 569} 570 571static void 572android_cpuInit(void) 573{ 574 char* cpuinfo = NULL; 575 int cpuinfo_len; 576 577 android_cpuInitFamily(); 578 579 g_cpuFeatures = 0; 580 g_cpuCount = 1; 581 g_inited = 1; 582 583 cpuinfo_len = get_file_size("/proc/cpuinfo"); 584 if (cpuinfo_len < 0) { 585 D("cpuinfo_len cannot be computed!"); 586 return; 587 } 588 cpuinfo = malloc(cpuinfo_len); 589 if (cpuinfo == NULL) { 590 D("cpuinfo buffer could not be allocated"); 591 return; 592 } 593 cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len); 594 D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len, 595 cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo); 596 597 if (cpuinfo_len < 0) /* should not happen */ { 598 free(cpuinfo); 599 return; 600 } 601 602 /* Count the CPU cores, the value may be 0 for single-core CPUs */ 603 g_cpuCount = get_cpu_count(); 604 if (g_cpuCount == 0) { 605 g_cpuCount = 1; 606 } 607 608 D("found cpuCount = %d\n", g_cpuCount); 609 610#ifdef __arm__ 611 { 612 char* features = NULL; 613 char* architecture = NULL; 614 615 /* Extract architecture from the "CPU Architecture" field. 616 * The list is well-known, unlike the the output of 617 * the 'Processor' field which can vary greatly. 618 * 619 * See the definition of the 'proc_arch' array in 620 * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in 621 * same file. 622 */ 623 char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); 624 625 if (cpuArch != NULL) { 626 char* end; 627 long archNumber; 628 int hasARMv7 = 0; 629 630 D("found cpuArch = '%s'\n", cpuArch); 631 632 /* read the initial decimal number, ignore the rest */ 633 archNumber = strtol(cpuArch, &end, 10); 634 635 /* Here we assume that ARMv8 will be upwards compatible with v7 636 * in the future. Unfortunately, there is no 'Features' field to 637 * indicate that Thumb-2 is supported. 638 */ 639 if (end > cpuArch && archNumber >= 7) { 640 hasARMv7 = 1; 641 } 642 643 /* Unfortunately, it seems that certain ARMv6-based CPUs 644 * report an incorrect architecture number of 7! 645 * 646 * See http://code.google.com/p/android/issues/detail?id=10812 647 * 648 * We try to correct this by looking at the 'elf_format' 649 * field reported by the 'Processor' field, which is of the 650 * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for 651 * an ARMv6-one. 652 */ 653 if (hasARMv7) { 654 char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len, 655 "Processor"); 656 if (cpuProc != NULL) { 657 D("found cpuProc = '%s'\n", cpuProc); 658 if (has_list_item(cpuProc, "(v6l)")) { 659 D("CPU processor and architecture mismatch!!\n"); 660 hasARMv7 = 0; 661 } 662 free(cpuProc); 663 } 664 } 665 666 if (hasARMv7) { 667 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; 668 } 669 670 /* The LDREX / STREX instructions are available from ARMv6 */ 671 if (archNumber >= 6) { 672 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; 673 } 674 675 free(cpuArch); 676 } 677 678 /* Extract the list of CPU features from ELF hwcaps */ 679 uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len); 680 681 if (hwcaps != 0) { 682 int has_vfp = (hwcaps & HWCAP_VFP); 683 int has_vfpv3 = (hwcaps & HWCAP_VFPv3); 684 int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16); 685 int has_vfpv4 = (hwcaps & HWCAP_VFPv4); 686 int has_neon = (hwcaps & HWCAP_NEON); 687 int has_idiva = (hwcaps & HWCAP_IDIVA); 688 int has_idivt = (hwcaps & HWCAP_IDIVT); 689 int has_iwmmxt = (hwcaps & HWCAP_IWMMXT); 690 691 // The kernel does a poor job at ensuring consistency when 692 // describing CPU features. So lots of guessing is needed. 693 694 // 'vfpv4' implies VFPv3|VFP_FMA|FP16 695 if (has_vfpv4) 696 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | 697 ANDROID_CPU_ARM_FEATURE_VFP_FP16 | 698 ANDROID_CPU_ARM_FEATURE_VFP_FMA; 699 700 // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC, 701 // a value of 'vfpv3' doesn't necessarily mean that the D32 702 // feature is present, so be conservative. All CPUs in the 703 // field that support D32 also support NEON, so this should 704 // not be a problem in practice. 705 if (has_vfpv3 || has_vfpv3d16) 706 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; 707 708 // 'vfp' is super ambiguous. Depending on the kernel, it can 709 // either mean VFPv2 or VFPv3. Make it depend on ARMv7. 710 if (has_vfp) { 711 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) 712 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; 713 else 714 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; 715 } 716 717 // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA 718 if (has_neon) { 719 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | 720 ANDROID_CPU_ARM_FEATURE_NEON | 721 ANDROID_CPU_ARM_FEATURE_VFP_D32; 722 if (has_vfpv4) 723 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; 724 } 725 726 // VFPv3 implies VFPv2 and ARMv7 727 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) 728 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 | 729 ANDROID_CPU_ARM_FEATURE_ARMv7; 730 731 if (has_idiva) 732 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; 733 if (has_idivt) 734 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; 735 736 if (has_iwmmxt) 737 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; 738 } 739 740 /* Extract the cpuid value from various fields */ 741 // The CPUID value is broken up in several entries in /proc/cpuinfo. 742 // This table is used to rebuild it from the entries. 743 static const struct CpuIdEntry { 744 const char* field; 745 char format; 746 char bit_lshift; 747 char bit_length; 748 } cpu_id_entries[] = { 749 { "CPU implementer", 'x', 24, 8 }, 750 { "CPU variant", 'x', 20, 4 }, 751 { "CPU part", 'x', 4, 12 }, 752 { "CPU revision", 'd', 0, 4 }, 753 }; 754 size_t i; 755 D("Parsing /proc/cpuinfo to recover CPUID\n"); 756 for (i = 0; 757 i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]); 758 ++i) { 759 const struct CpuIdEntry* entry = &cpu_id_entries[i]; 760 char* value = extract_cpuinfo_field(cpuinfo, 761 cpuinfo_len, 762 entry->field); 763 if (value == NULL) 764 continue; 765 766 D("field=%s value='%s'\n", entry->field, value); 767 char* value_end = value + strlen(value); 768 int val = 0; 769 const char* start = value; 770 const char* p; 771 if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) { 772 start += 2; 773 p = parse_hexadecimal(start, value_end, &val); 774 } else if (entry->format == 'x') 775 p = parse_hexadecimal(value, value_end, &val); 776 else 777 p = parse_decimal(value, value_end, &val); 778 779 if (p > (const char*)start) { 780 val &= ((1 << entry->bit_length)-1); 781 val <<= entry->bit_lshift; 782 g_cpuIdArm |= (uint32_t) val; 783 } 784 785 free(value); 786 } 787 788 // Handle kernel configuration bugs that prevent the correct 789 // reporting of CPU features. 790 static const struct CpuFix { 791 uint32_t cpuid; 792 uint64_t or_flags; 793 } cpu_fixes[] = { 794 /* The Nexus 4 (Qualcomm Krait) kernel configuration 795 * forgets to report IDIV support. */ 796 { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | 797 ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, 798 }; 799 size_t n; 800 for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) { 801 const struct CpuFix* entry = &cpu_fixes[n]; 802 803 if (g_cpuIdArm == entry->cpuid) 804 g_cpuFeatures |= entry->or_flags; 805 } 806 807 } 808#endif /* __arm__ */ 809 810#ifdef __i386__ 811 int regs[4]; 812 813/* According to http://en.wikipedia.org/wiki/CPUID */ 814#define VENDOR_INTEL_b 0x756e6547 815#define VENDOR_INTEL_c 0x6c65746e 816#define VENDOR_INTEL_d 0x49656e69 817 818 x86_cpuid(0, regs); 819 int vendorIsIntel = (regs[1] == VENDOR_INTEL_b && 820 regs[2] == VENDOR_INTEL_c && 821 regs[3] == VENDOR_INTEL_d); 822 823 x86_cpuid(1, regs); 824 if ((regs[2] & (1 << 9)) != 0) { 825 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; 826 } 827 if ((regs[2] & (1 << 23)) != 0) { 828 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; 829 } 830 if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) { 831 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; 832 } 833#endif 834 835 free(cpuinfo); 836} 837 838 839AndroidCpuFamily 840android_getCpuFamily(void) 841{ 842 pthread_once(&g_once, android_cpuInit); 843 return g_cpuFamily; 844} 845 846 847uint64_t 848android_getCpuFeatures(void) 849{ 850 pthread_once(&g_once, android_cpuInit); 851 return g_cpuFeatures; 852} 853 854 855int 856android_getCpuCount(void) 857{ 858 pthread_once(&g_once, android_cpuInit); 859 return g_cpuCount; 860} 861 862static void 863android_cpuInitDummy(void) 864{ 865 g_inited = 1; 866} 867 868int 869android_setCpu(int cpu_count, uint64_t cpu_features) 870{ 871 /* Fail if the library was already initialized. */ 872 if (g_inited) 873 return 0; 874 875 android_cpuInitFamily(); 876 g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); 877 g_cpuFeatures = cpu_features; 878 pthread_once(&g_once, android_cpuInitDummy); 879 880 return 1; 881} 882 883#ifdef __arm__ 884uint32_t 885android_getCpuIdArm(void) 886{ 887 pthread_once(&g_once, android_cpuInit); 888 return g_cpuIdArm; 889} 890 891int 892android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id) 893{ 894 if (!android_setCpu(cpu_count, cpu_features)) 895 return 0; 896 897 g_cpuIdArm = cpu_id; 898 return 1; 899} 900#endif /* __arm__ */ 901 902/* 903 * Technical note: Making sense of ARM's FPU architecture versions. 904 * 905 * FPA was ARM's first attempt at an FPU architecture. There is no Android 906 * device that actually uses it since this technology was already obsolete 907 * when the project started. If you see references to FPA instructions 908 * somewhere, you can be sure that this doesn't apply to Android at all. 909 * 910 * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of 911 * new versions / additions to it. ARM considers this obsolete right now, 912 * and no known Android device implements it either. 913 * 914 * VFPv2 added a few instructions to VFPv1, and is an *optional* extension 915 * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device 916 * supporting the 'armeabi' ABI doesn't necessarily support these. 917 * 918 * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used 919 * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated 920 * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means 921 * that it provides 16 double-precision FPU registers (d0-d15) and 32 922 * single-precision ones (s0-s31) which happen to be mapped to the same 923 * register banks. 924 * 925 * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16 926 * additional double precision registers (d16-d31). Note that there are 927 * still only 32 single precision registers. 928 * 929 * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision 930 * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which 931 * are not supported by Android. Note that it is not compatible with VFPv2. 932 * 933 * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32 934 * depending on context. For example GCC uses it for VFPv3-D32, but 935 * the Linux kernel code uses it for VFPv3-D16 (especially in 936 * /proc/cpuinfo). Always try to use the full designation when 937 * possible. 938 * 939 * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides 940 * instructions to perform parallel computations on vectors of 8, 16, 941 * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all 942 * NEON registers are also mapped to the same register banks. 943 * 944 * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to 945 * perform fused multiply-accumulate on VFP registers, as well as 946 * half-precision (16-bit) conversion operations. 947 * 948 * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision 949 * registers. 950 * 951 * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused 952 * multiply-accumulate instructions that work on the NEON registers. 953 * 954 * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32 955 * depending on context. 956 * 957 * The following information was determined by scanning the binutils-2.22 958 * sources: 959 * 960 * Basic VFP instruction subsets: 961 * 962 * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set. 963 * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns. 964 * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1. 965 * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision. 966 * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision. 967 * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns. 968 * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31. 969 * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions. 970 * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add 971 * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add 972 * 973 * FPU types (excluding NEON) 974 * 975 * FPU_VFP_V1xD (EXT_V1xD) 976 * | 977 * +--------------------------+ 978 * | | 979 * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD) 980 * | | 981 * | | 982 * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA) 983 * | 984 * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3) 985 * | 986 * +--------------------------+ 987 * | | 988 * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA) 989 * | | 990 * | FPU_VFP_V4 (+EXT_D32) 991 * | 992 * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA) 993 * 994 * VFP architectures: 995 * 996 * ARCH_VFP_V1xD (EXT_V1xD) 997 * | 998 * +------------------+ 999 * | | 1000 * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD) 1001 * | | 1002 * | ARCH_VFP_V3xD_FP16 (+EXT_FP16) 1003 * | | 1004 * | ARCH_VFP_V4_SP_D16 (+EXT_FMA) 1005 * | 1006 * ARCH_VFP_V1 (+EXT_V1) 1007 * | 1008 * ARCH_VFP_V2 (+EXT_V2) 1009 * | 1010 * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3) 1011 * | 1012 * +-------------------+ 1013 * | | 1014 * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) 1015 * | 1016 * +-------------------+ 1017 * | | 1018 * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) 1019 * | | 1020 * | ARCH_VFP_V4 (+EXT_D32) 1021 * | | 1022 * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) 1023 * | 1024 * ARCH_VFP_V3 (+EXT_D32) 1025 * | 1026 * +-------------------+ 1027 * | | 1028 * | ARCH_VFP_V3_FP16 (+EXT_FP16) 1029 * | 1030 * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) 1031 * | 1032 * ARCH_NEON_FP16 (+EXT_FP16) 1033 * 1034 * -fpu=<name> values and their correspondance with FPU architectures above: 1035 * 1036 * {"vfp", FPU_ARCH_VFP_V2}, 1037 * {"vfp9", FPU_ARCH_VFP_V2}, 1038 * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility. 1039 * {"vfp10", FPU_ARCH_VFP_V2}, 1040 * {"vfp10-r0", FPU_ARCH_VFP_V1}, 1041 * {"vfpxd", FPU_ARCH_VFP_V1xD}, 1042 * {"vfpv2", FPU_ARCH_VFP_V2}, 1043 * {"vfpv3", FPU_ARCH_VFP_V3}, 1044 * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16}, 1045 * {"vfpv3-d16", FPU_ARCH_VFP_V3D16}, 1046 * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16}, 1047 * {"vfpv3xd", FPU_ARCH_VFP_V3xD}, 1048 * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16}, 1049 * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, 1050 * {"neon-fp16", FPU_ARCH_NEON_FP16}, 1051 * {"vfpv4", FPU_ARCH_VFP_V4}, 1052 * {"vfpv4-d16", FPU_ARCH_VFP_V4D16}, 1053 * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16}, 1054 * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4}, 1055 * 1056 * 1057 * Simplified diagram that only includes FPUs supported by Android: 1058 * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI, 1059 * all others are optional and must be probed at runtime. 1060 * 1061 * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3) 1062 * | 1063 * +-------------------+ 1064 * | | 1065 * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) 1066 * | 1067 * +-------------------+ 1068 * | | 1069 * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) 1070 * | | 1071 * | ARCH_VFP_V4 (+EXT_D32) 1072 * | | 1073 * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) 1074 * | 1075 * ARCH_VFP_V3 (+EXT_D32) 1076 * | 1077 * +-------------------+ 1078 * | | 1079 * | ARCH_VFP_V3_FP16 (+EXT_FP16) 1080 * | 1081 * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) 1082 * | 1083 * ARCH_NEON_FP16 (+EXT_FP16) 1084 * 1085 */ 1086 1087#endif // defined(__le32__) || defined(__le64__) 1088