cpu-features.c revision 7f26cc56c2345217c63b88283be03aa549dd71a6
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/* ChangeLog for this library: 30 * 31 * NDK r8d: Add android_setCpu(). 32 * 33 * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16, 34 * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt. 35 * 36 * Rewrite the code to parse /proc/self/auxv instead of 37 * the "Features" field in /proc/cpuinfo. 38 * 39 * Dynamically allocate the buffer that hold the content 40 * of /proc/cpuinfo to deal with newer hardware. 41 * 42 * NDK r7c: Fix CPU count computation. The old method only reported the 43 * number of _active_ CPUs when the library was initialized, 44 * which could be less than the real total. 45 * 46 * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7 47 * for an ARMv6 CPU (see below). 48 * 49 * Handle kernels that only report 'neon', and not 'vfpv3' 50 * (VFPv3 is mandated by the ARM architecture is Neon is implemented) 51 * 52 * Handle kernels that only report 'vfpv3d16', and not 'vfpv3' 53 * 54 * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in 55 * android_getCpuFamily(). 56 * 57 * NDK r4: Initial release 58 */ 59#include <sys/system_properties.h> 60#ifdef __arm__ 61#include <machine/cpu-features.h> 62#endif 63#include <pthread.h> 64#include "cpu-features.h" 65#include <stdio.h> 66#include <stdlib.h> 67#include <fcntl.h> 68#include <errno.h> 69 70static pthread_once_t g_once; 71static int g_inited; 72static AndroidCpuFamily g_cpuFamily; 73static uint64_t g_cpuFeatures; 74static int g_cpuCount; 75 76#ifdef __arm__ 77static uint32_t g_cpuIdArm; 78#endif 79 80static const int android_cpufeatures_debug = 0; 81 82#ifdef __arm__ 83# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_ARM 84#elif defined __i386__ 85# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_X86 86#else 87# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_UNKNOWN 88#endif 89 90#define D(...) \ 91 do { \ 92 if (android_cpufeatures_debug) { \ 93 printf(__VA_ARGS__); fflush(stdout); \ 94 } \ 95 } while (0) 96 97#ifdef __i386__ 98static __inline__ void x86_cpuid(int func, int values[4]) 99{ 100 int a, b, c, d; 101 /* We need to preserve ebx since we're compiling PIC code */ 102 /* this means we can't use "=b" for the second output register */ 103 __asm__ __volatile__ ( \ 104 "push %%ebx\n" 105 "cpuid\n" \ 106 "mov %%ebx, %1\n" 107 "pop %%ebx\n" 108 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ 109 : "a" (func) \ 110 ); 111 values[0] = a; 112 values[1] = b; 113 values[2] = c; 114 values[3] = d; 115} 116#endif 117 118/* Get the size of a file by reading it until the end. This is needed 119 * because files under /proc do not always return a valid size when 120 * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed. 121 */ 122static int 123get_file_size(const char* pathname) 124{ 125 int fd, ret, result = 0; 126 char buffer[256]; 127 128 fd = open(pathname, O_RDONLY); 129 if (fd < 0) { 130 D("Can't open %s: %s\n", pathname, strerror(errno)); 131 return -1; 132 } 133 134 for (;;) { 135 int ret = read(fd, buffer, sizeof buffer); 136 if (ret < 0) { 137 if (errno == EINTR) 138 continue; 139 D("Error while reading %s: %s\n", pathname, strerror(errno)); 140 break; 141 } 142 if (ret == 0) 143 break; 144 145 result += ret; 146 } 147 close(fd); 148 return result; 149} 150 151/* Read the content of /proc/cpuinfo into a user-provided buffer. 152 * Return the length of the data, or -1 on error. Does *not* 153 * zero-terminate the content. Will not read more 154 * than 'buffsize' bytes. 155 */ 156static int 157read_file(const char* pathname, char* buffer, size_t buffsize) 158{ 159 int fd, count; 160 161 fd = open(pathname, O_RDONLY); 162 if (fd < 0) { 163 D("Could not open %s: %s\n", pathname, strerror(errno)); 164 return -1; 165 } 166 count = 0; 167 while (count < (int)buffsize) { 168 int ret = read(fd, buffer + count, buffsize - count); 169 if (ret < 0) { 170 if (errno == EINTR) 171 continue; 172 D("Error while reading from %s: %s\n", pathname, strerror(errno)); 173 if (count == 0) 174 count = -1; 175 break; 176 } 177 if (ret == 0) 178 break; 179 count += ret; 180 } 181 close(fd); 182 return count; 183} 184 185/* Extract the content of a the first occurence of a given field in 186 * the content of /proc/cpuinfo and return it as a heap-allocated 187 * string that must be freed by the caller. 188 * 189 * Return NULL if not found 190 */ 191static char* 192extract_cpuinfo_field(const char* buffer, int buflen, const char* field) 193{ 194 int fieldlen = strlen(field); 195 const char* bufend = buffer + buflen; 196 char* result = NULL; 197 int len, ignore; 198 const char *p, *q; 199 200 /* Look for first field occurence, and ensures it starts the line. */ 201 p = buffer; 202 for (;;) { 203 p = memmem(p, bufend-p, field, fieldlen); 204 if (p == NULL) 205 goto EXIT; 206 207 if (p == buffer || p[-1] == '\n') 208 break; 209 210 p += fieldlen; 211 } 212 213 /* Skip to the first column followed by a space */ 214 p += fieldlen; 215 p = memchr(p, ':', bufend-p); 216 if (p == NULL || p[1] != ' ') 217 goto EXIT; 218 219 /* Find the end of the line */ 220 p += 2; 221 q = memchr(p, '\n', bufend-p); 222 if (q == NULL) 223 q = bufend; 224 225 /* Copy the line into a heap-allocated buffer */ 226 len = q-p; 227 result = malloc(len+1); 228 if (result == NULL) 229 goto EXIT; 230 231 memcpy(result, p, len); 232 result[len] = '\0'; 233 234EXIT: 235 return result; 236} 237 238/* Like strlen(), but for constant string literals */ 239#define STRLEN_CONST(x) ((sizeof(x)-1) 240 241 242/* Checks that a space-separated list of items contains one given 'item'. 243 * Returns 1 if found, 0 otherwise. 244 */ 245static int 246has_list_item(const char* list, const char* item) 247{ 248 const char* p = list; 249 int itemlen = strlen(item); 250 251 if (list == NULL) 252 return 0; 253 254 while (*p) { 255 const char* q; 256 257 /* skip spaces */ 258 while (*p == ' ' || *p == '\t') 259 p++; 260 261 /* find end of current list item */ 262 q = p; 263 while (*q && *q != ' ' && *q != '\t') 264 q++; 265 266 if (itemlen == q-p && !memcmp(p, item, itemlen)) 267 return 1; 268 269 /* skip to next item */ 270 p = q; 271 } 272 return 0; 273} 274 275/* Parse a number starting from 'input', but not going further 276 * than 'limit'. Return the value into '*result'. 277 * 278 * NOTE: Does not skip over leading spaces, or deal with sign characters. 279 * NOTE: Ignores overflows. 280 * 281 * The function returns NULL in case of error (bad format), or the new 282 * position after the decimal number in case of success (which will always 283 * be <= 'limit'). 284 */ 285static const char* 286parse_number(const char* input, const char* limit, int base, int* result) 287{ 288 const char* p = input; 289 int val = 0; 290 while (p < limit) { 291 int d = (*p - '0'); 292 if ((unsigned)d >= 10U) { 293 d = (*p - 'a'); 294 if ((unsigned)d >= 6U) 295 d = (*p - 'A'); 296 if ((unsigned)d >= 6U) 297 break; 298 d += 10; 299 } 300 if (d >= base) 301 break; 302 val = val*base + d; 303 p++; 304 } 305 if (p == input) 306 return NULL; 307 308 *result = val; 309 return p; 310} 311 312static const char* 313parse_decimal(const char* input, const char* limit, int* result) 314{ 315 return parse_number(input, limit, 10, result); 316} 317 318static const char* 319parse_hexadecimal(const char* input, const char* limit, int* result) 320{ 321 return parse_number(input, limit, 16, result); 322} 323 324/* This small data type is used to represent a CPU list / mask, as read 325 * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt 326 * 327 * For now, we don't expect more than 32 cores on mobile devices, so keep 328 * everything simple. 329 */ 330typedef struct { 331 uint32_t mask; 332} CpuList; 333 334static __inline__ void 335cpulist_init(CpuList* list) { 336 list->mask = 0; 337} 338 339static __inline__ void 340cpulist_and(CpuList* list1, CpuList* list2) { 341 list1->mask &= list2->mask; 342} 343 344static __inline__ void 345cpulist_set(CpuList* list, int index) { 346 if ((unsigned)index < 32) { 347 list->mask |= (uint32_t)(1U << index); 348 } 349} 350 351static __inline__ int 352cpulist_count(CpuList* list) { 353 return __builtin_popcount(list->mask); 354} 355 356/* Parse a textual list of cpus and store the result inside a CpuList object. 357 * Input format is the following: 358 * - comma-separated list of items (no spaces) 359 * - each item is either a single decimal number (cpu index), or a range made 360 * of two numbers separated by a single dash (-). Ranges are inclusive. 361 * 362 * Examples: 0 363 * 2,4-127,128-143 364 * 0-1 365 */ 366static void 367cpulist_parse(CpuList* list, const char* line, int line_len) 368{ 369 const char* p = line; 370 const char* end = p + line_len; 371 const char* q; 372 373 /* NOTE: the input line coming from sysfs typically contains a 374 * trailing newline, so take care of it in the code below 375 */ 376 while (p < end && *p != '\n') 377 { 378 int val, start_value, end_value; 379 380 /* Find the end of current item, and put it into 'q' */ 381 q = memchr(p, ',', end-p); 382 if (q == NULL) { 383 q = end; 384 } 385 386 /* Get first value */ 387 p = parse_decimal(p, q, &start_value); 388 if (p == NULL) 389 goto BAD_FORMAT; 390 391 end_value = start_value; 392 393 /* If we're not at the end of the item, expect a dash and 394 * and integer; extract end value. 395 */ 396 if (p < q && *p == '-') { 397 p = parse_decimal(p+1, q, &end_value); 398 if (p == NULL) 399 goto BAD_FORMAT; 400 } 401 402 /* Set bits CPU list bits */ 403 for (val = start_value; val <= end_value; val++) { 404 cpulist_set(list, val); 405 } 406 407 /* Jump to next item */ 408 p = q; 409 if (p < end) 410 p++; 411 } 412 413BAD_FORMAT: 414 ; 415} 416 417/* Read a CPU list from one sysfs file */ 418static void 419cpulist_read_from(CpuList* list, const char* filename) 420{ 421 char file[64]; 422 int filelen; 423 424 cpulist_init(list); 425 426 filelen = read_file(filename, file, sizeof file); 427 if (filelen < 0) { 428 D("Could not read %s: %s\n", filename, strerror(errno)); 429 return; 430 } 431 432 cpulist_parse(list, file, filelen); 433} 434 435// See <asm/hwcap.h> kernel header. 436#define HWCAP_VFP (1 << 6) 437#define HWCAP_IWMMXT (1 << 9) 438#define HWCAP_NEON (1 << 12) 439#define HWCAP_VFPv3 (1 << 13) 440#define HWCAP_VFPv3D16 (1 << 14) 441#define HWCAP_VFPv4 (1 << 16) 442#define HWCAP_IDIVA (1 << 17) 443#define HWCAP_IDIVT (1 << 18) 444 445#define AT_HWCAP 16 446 447#if defined(__arm__) 448/* Compute the ELF HWCAP flags. 449 */ 450static uint32_t 451get_elf_hwcap(const char* cpuinfo, int cpuinfo_len) 452{ 453 /* IMPORTANT: 454 * Accessing /proc/self/auxv doesn't work anymore on all 455 * platform versions. More specifically, when running inside 456 * a regular application process, most of /proc/self/ will be 457 * non-readable, including /proc/self/auxv. This doesn't 458 * happen however if the application is debuggable, or when 459 * running under the "shell" UID, which is why this was not 460 * detected appropriately. 461 */ 462#if 0 463 uint32_t result = 0; 464 const char filepath[] = "/proc/self/auxv"; 465 int fd = open(filepath, O_RDONLY); 466 if (fd < 0) { 467 D("Could not open %s: %s\n", filepath, strerror(errno)); 468 return 0; 469 } 470 471 struct { uint32_t tag; uint32_t value; } entry; 472 473 for (;;) { 474 int ret = read(fd, (char*)&entry, sizeof entry); 475 if (ret < 0) { 476 if (errno == EINTR) 477 continue; 478 D("Error while reading %s: %s\n", filepath, strerror(errno)); 479 break; 480 } 481 // Detect end of list. 482 if (ret == 0 || (entry.tag == 0 && entry.value == 0)) 483 break; 484 if (entry.tag == AT_HWCAP) { 485 result = entry.value; 486 break; 487 } 488 } 489 close(fd); 490 return result; 491#else 492 // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag. 493 uint32_t hwcaps = 0; 494 495 char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features"); 496 497 if (cpuFeatures != NULL) { 498 D("Found cpuFeatures = '%s'\n", cpuFeatures); 499 500 if (has_list_item(cpuFeatures, "vfp")) 501 hwcaps |= HWCAP_VFP; 502 if (has_list_item(cpuFeatures, "vfpv3")) 503 hwcaps |= HWCAP_VFPv3; 504 if (has_list_item(cpuFeatures, "vfpv3d16")) 505 hwcaps |= HWCAP_VFPv3D16; 506 if (has_list_item(cpuFeatures, "vfpv4")) 507 hwcaps |= HWCAP_VFPv4; 508 if (has_list_item(cpuFeatures, "neon")) 509 hwcaps |= HWCAP_NEON; 510 if (has_list_item(cpuFeatures, "idiva")) 511 hwcaps |= HWCAP_IDIVA; 512 if (has_list_item(cpuFeatures, "idivt")) 513 hwcaps |= HWCAP_IDIVT; 514 if (has_list_item(cpuFeatures, "idiv")) 515 hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT; 516 if (has_list_item(cpuFeatures, "iwmmxt")) 517 hwcaps |= HWCAP_IWMMXT; 518 519 free(cpuFeatures); 520 } 521 return hwcaps; 522#endif 523} 524#endif /* __arm__ */ 525 526/* Return the number of cpus present on a given device. 527 * 528 * To handle all weird kernel configurations, we need to compute the 529 * intersection of the 'present' and 'possible' CPU lists and count 530 * the result. 531 */ 532static int 533get_cpu_count(void) 534{ 535 CpuList cpus_present[1]; 536 CpuList cpus_possible[1]; 537 538 cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present"); 539 cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible"); 540 541 /* Compute the intersection of both sets to get the actual number of 542 * CPU cores that can be used on this device by the kernel. 543 */ 544 cpulist_and(cpus_present, cpus_possible); 545 546 return cpulist_count(cpus_present); 547} 548 549static void 550android_cpuInitFamily(void) 551{ 552#if defined(__ARM_ARCH__) 553 g_cpuFamily = ANDROID_CPU_FAMILY_ARM; 554#elif defined(__i386__) 555 g_cpuFamily = ANDROID_CPU_FAMILY_X86; 556#elif defined(_MIPS_ARCH) 557 g_cpuFamily = ANDROID_CPU_FAMILY_MIPS; 558#else 559 g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; 560#endif 561} 562 563static void 564android_cpuInit(void) 565{ 566 char* cpuinfo = NULL; 567 int cpuinfo_len; 568 569 android_cpuInitFamily(); 570 571 g_cpuFeatures = 0; 572 g_cpuCount = 1; 573 g_inited = 1; 574 575 cpuinfo_len = get_file_size("/proc/cpuinfo"); 576 if (cpuinfo_len < 0) { 577 D("cpuinfo_len cannot be computed!"); 578 return; 579 } 580 cpuinfo = malloc(cpuinfo_len); 581 if (cpuinfo == NULL) { 582 D("cpuinfo buffer could not be allocated"); 583 return; 584 } 585 cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len); 586 D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len, 587 cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo); 588 589 if (cpuinfo_len < 0) /* should not happen */ { 590 free(cpuinfo); 591 return; 592 } 593 594 /* Count the CPU cores, the value may be 0 for single-core CPUs */ 595 g_cpuCount = get_cpu_count(); 596 if (g_cpuCount == 0) { 597 g_cpuCount = 1; 598 } 599 600 D("found cpuCount = %d\n", g_cpuCount); 601 602#ifdef __ARM_ARCH__ 603 { 604 char* features = NULL; 605 char* architecture = NULL; 606 607 /* Extract architecture from the "CPU Architecture" field. 608 * The list is well-known, unlike the the output of 609 * the 'Processor' field which can vary greatly. 610 * 611 * See the definition of the 'proc_arch' array in 612 * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in 613 * same file. 614 */ 615 char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); 616 617 if (cpuArch != NULL) { 618 char* end; 619 long archNumber; 620 int hasARMv7 = 0; 621 622 D("found cpuArch = '%s'\n", cpuArch); 623 624 /* read the initial decimal number, ignore the rest */ 625 archNumber = strtol(cpuArch, &end, 10); 626 627 /* Here we assume that ARMv8 will be upwards compatible with v7 628 * in the future. Unfortunately, there is no 'Features' field to 629 * indicate that Thumb-2 is supported. 630 */ 631 if (end > cpuArch && archNumber >= 7) { 632 hasARMv7 = 1; 633 } 634 635 /* Unfortunately, it seems that certain ARMv6-based CPUs 636 * report an incorrect architecture number of 7! 637 * 638 * See http://code.google.com/p/android/issues/detail?id=10812 639 * 640 * We try to correct this by looking at the 'elf_format' 641 * field reported by the 'Processor' field, which is of the 642 * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for 643 * an ARMv6-one. 644 */ 645 if (hasARMv7) { 646 char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len, 647 "Processor"); 648 if (cpuProc != NULL) { 649 D("found cpuProc = '%s'\n", cpuProc); 650 if (has_list_item(cpuProc, "(v6l)")) { 651 D("CPU processor and architecture mismatch!!\n"); 652 hasARMv7 = 0; 653 } 654 free(cpuProc); 655 } 656 } 657 658 if (hasARMv7) { 659 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; 660 } 661 662 /* The LDREX / STREX instructions are available from ARMv6 */ 663 if (archNumber >= 6) { 664 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; 665 } 666 667 free(cpuArch); 668 } 669 670 /* Extract the list of CPU features from ELF hwcaps */ 671 uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len); 672 673 if (hwcaps != 0) { 674 int has_vfp = (hwcaps & HWCAP_VFP); 675 int has_vfpv3 = (hwcaps & HWCAP_VFPv3); 676 int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16); 677 int has_vfpv4 = (hwcaps & HWCAP_VFPv4); 678 int has_neon = (hwcaps & HWCAP_NEON); 679 int has_idiva = (hwcaps & HWCAP_IDIVA); 680 int has_idivt = (hwcaps & HWCAP_IDIVT); 681 int has_iwmmxt = (hwcaps & HWCAP_IWMMXT); 682 683 // The kernel does a poor job at ensuring consistency when 684 // describing CPU features. So lots of guessing is needed. 685 686 // 'vfpv4' implies VFPv3|VFP_FMA|FP16 687 if (has_vfpv4) 688 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | 689 ANDROID_CPU_ARM_FEATURE_VFP_FP16 | 690 ANDROID_CPU_ARM_FEATURE_VFP_FMA; 691 692 // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC, 693 // a value of 'vfpv3' doesn't necessarily mean that the D32 694 // feature is present, so be conservative. All CPUs in the 695 // field that support D32 also support NEON, so this should 696 // not be a problem in practice. 697 if (has_vfpv3 || has_vfpv3d16) 698 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; 699 700 // 'vfp' is super ambiguous. Depending on the kernel, it can 701 // either mean VFPv2 or VFPv3. Make it depend on ARMv7. 702 if (has_vfp) { 703 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) 704 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; 705 else 706 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; 707 } 708 709 // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA 710 if (has_neon) { 711 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | 712 ANDROID_CPU_ARM_FEATURE_NEON | 713 ANDROID_CPU_ARM_FEATURE_VFP_D32; 714 if (has_vfpv4) 715 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; 716 } 717 718 // VFPv3 implies VFPv2 and ARMv7 719 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) 720 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 | 721 ANDROID_CPU_ARM_FEATURE_ARMv7; 722 723 // Note that some buggy kernels do not report these even when 724 // the CPU actually support the division instructions. However, 725 // assume that if 'vfpv4' is detected, then the CPU supports 726 // sdiv/udiv properly. 727 if (has_idiva || has_vfpv4) 728 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; 729 if (has_idivt || has_vfpv4) 730 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; 731 732 if (has_iwmmxt) 733 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; 734 } 735 736 /* Extract the cpuid value from various fields */ 737 // The CPUID value is broken up in several entries in /proc/cpuinfo. 738 // This table is used to rebuild it from the entries. 739 const struct CpuIdEntry { 740 const char* field; 741 char format; 742 char bit_lshift; 743 char bit_length; 744 } cpu_id_entries[] = { 745 { "CPU implementer", 'x', 24, 8 }, 746 { "CPU variant", 'x', 20, 4 }, 747 { "CPU part", 'x', 4, 12 }, 748 { "CPU revision", 'd', 0, 4 }, 749 }; 750 size_t i; 751 for (i = 0; 752 i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]); 753 ++i) { 754 const struct CpuIdEntry* entry = &cpu_id_entries[i]; 755 char* value = extract_cpuinfo_field(cpuinfo, 756 cpuinfo_len, 757 entry->field); 758 if (value == NULL) 759 continue; 760 761 printf("field=%s value='%s'\n", entry->field, value); 762 char* value_end = value + strlen(value); 763 int val = 0; 764 const char* start = value; 765 const char* p; 766 if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) { 767 start += 2; 768 p = parse_hexadecimal(start, value_end, &val); 769 } else if (entry->format == 'x') 770 p = parse_hexadecimal(value, value_end, &val); 771 else 772 p = parse_decimal(value, value_end, &val); 773 774 if (p > (const char*)start) { 775 val &= ((1 << entry->bit_length)-1); 776 val <<= entry->bit_lshift; 777 g_cpuIdArm |= (uint32_t) val; 778 } 779 780 free(value); 781 } 782 } 783#endif /* __ARM_ARCH__ */ 784 785#ifdef __i386__ 786 int regs[4]; 787 788/* According to http://en.wikipedia.org/wiki/CPUID */ 789#define VENDOR_INTEL_b 0x756e6547 790#define VENDOR_INTEL_c 0x6c65746e 791#define VENDOR_INTEL_d 0x49656e69 792 793 x86_cpuid(0, regs); 794 int vendorIsIntel = (regs[1] == VENDOR_INTEL_b && 795 regs[2] == VENDOR_INTEL_c && 796 regs[3] == VENDOR_INTEL_d); 797 798 x86_cpuid(1, regs); 799 if ((regs[2] & (1 << 9)) != 0) { 800 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; 801 } 802 if ((regs[2] & (1 << 23)) != 0) { 803 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; 804 } 805 if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) { 806 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; 807 } 808#endif 809 810 free(cpuinfo); 811} 812 813 814AndroidCpuFamily 815android_getCpuFamily(void) 816{ 817 pthread_once(&g_once, android_cpuInit); 818 return g_cpuFamily; 819} 820 821 822uint64_t 823android_getCpuFeatures(void) 824{ 825 pthread_once(&g_once, android_cpuInit); 826 return g_cpuFeatures; 827} 828 829 830int 831android_getCpuCount(void) 832{ 833 pthread_once(&g_once, android_cpuInit); 834 return g_cpuCount; 835} 836 837static void 838android_cpuInitDummy(void) 839{ 840 g_inited = 1; 841} 842 843int 844android_setCpu(int cpu_count, uint64_t cpu_features) 845{ 846 /* Fail if the library was already initialized. */ 847 if (g_inited) 848 return 0; 849 850 android_cpuInitFamily(); 851 g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); 852 g_cpuFeatures = cpu_features; 853 pthread_once(&g_once, android_cpuInitDummy); 854 855 return 1; 856} 857 858#ifdef __arm__ 859uint32_t 860android_getCpuIdArm(void) 861{ 862 pthread_once(&g_once, android_cpuInit); 863 return g_cpuIdArm; 864} 865 866int 867android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id) 868{ 869 if (!android_setCpu(cpu_count, cpu_features)) 870 return 0; 871 872 g_cpuIdArm = cpu_id; 873 return 1; 874} 875#endif /* __arm__ */ 876 877/* 878 * Technical note: Making sense of ARM's FPU architecture versions. 879 * 880 * FPA was ARM's first attempt at an FPU architecture. There is no Android 881 * device that actually uses it since this technology was already obsolete 882 * when the project started. If you see references to FPA instructions 883 * somewhere, you can be sure that this doesn't apply to Android at all. 884 * 885 * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of 886 * new versions / additions to it. ARM considers this obsolete right now, 887 * and no known Android device implements it either. 888 * 889 * VFPv2 added a few instructions to VFPv1, and is an *optional* extension 890 * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device 891 * supporting the 'armeabi' ABI doesn't necessarily support these. 892 * 893 * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used 894 * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated 895 * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means 896 * that it provides 16 double-precision FPU registers (d0-d15) and 32 897 * single-precision ones (s0-s31) which happen to be mapped to the same 898 * register banks. 899 * 900 * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16 901 * additional double precision registers (d16-d31). Note that there are 902 * still only 32 single precision registers. 903 * 904 * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision 905 * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which 906 * are not supported by Android. Note that it is not compatible with VFPv2. 907 * 908 * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32 909 * depending on context. For example GCC uses it for VFPv3-D32, but 910 * the Linux kernel code uses it for VFPv3-D16 (especially in 911 * /proc/cpuinfo). Always try to use the full designation when 912 * possible. 913 * 914 * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides 915 * instructions to perform parallel computations on vectors of 8, 16, 916 * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all 917 * NEON registers are also mapped to the same register banks. 918 * 919 * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to 920 * perform fused multiply-accumulate on VFP registers, as well as 921 * half-precision (16-bit) conversion operations. 922 * 923 * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision 924 * registers. 925 * 926 * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused 927 * multiply-accumulate instructions that work on the NEON registers. 928 * 929 * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32 930 * depending on context. 931 * 932 * The following information was determined by scanning the binutils-2.22 933 * sources: 934 * 935 * Basic VFP instruction subsets: 936 * 937 * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set. 938 * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns. 939 * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1. 940 * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision. 941 * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision. 942 * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns. 943 * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31. 944 * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions. 945 * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add 946 * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add 947 * 948 * FPU types (excluding NEON) 949 * 950 * FPU_VFP_V1xD (EXT_V1xD) 951 * | 952 * +--------------------------+ 953 * | | 954 * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD) 955 * | | 956 * | | 957 * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA) 958 * | 959 * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3) 960 * | 961 * +--------------------------+ 962 * | | 963 * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA) 964 * | | 965 * | FPU_VFP_V4 (+EXT_D32) 966 * | 967 * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA) 968 * 969 * VFP architectures: 970 * 971 * ARCH_VFP_V1xD (EXT_V1xD) 972 * | 973 * +------------------+ 974 * | | 975 * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD) 976 * | | 977 * | ARCH_VFP_V3xD_FP16 (+EXT_FP16) 978 * | | 979 * | ARCH_VFP_V4_SP_D16 (+EXT_FMA) 980 * | 981 * ARCH_VFP_V1 (+EXT_V1) 982 * | 983 * ARCH_VFP_V2 (+EXT_V2) 984 * | 985 * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3) 986 * | 987 * +-------------------+ 988 * | | 989 * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) 990 * | 991 * +-------------------+ 992 * | | 993 * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) 994 * | | 995 * | ARCH_VFP_V4 (+EXT_D32) 996 * | | 997 * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) 998 * | 999 * ARCH_VFP_V3 (+EXT_D32) 1000 * | 1001 * +-------------------+ 1002 * | | 1003 * | ARCH_VFP_V3_FP16 (+EXT_FP16) 1004 * | 1005 * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) 1006 * | 1007 * ARCH_NEON_FP16 (+EXT_FP16) 1008 * 1009 * -fpu=<name> values and their correspondance with FPU architectures above: 1010 * 1011 * {"vfp", FPU_ARCH_VFP_V2}, 1012 * {"vfp9", FPU_ARCH_VFP_V2}, 1013 * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility. 1014 * {"vfp10", FPU_ARCH_VFP_V2}, 1015 * {"vfp10-r0", FPU_ARCH_VFP_V1}, 1016 * {"vfpxd", FPU_ARCH_VFP_V1xD}, 1017 * {"vfpv2", FPU_ARCH_VFP_V2}, 1018 * {"vfpv3", FPU_ARCH_VFP_V3}, 1019 * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16}, 1020 * {"vfpv3-d16", FPU_ARCH_VFP_V3D16}, 1021 * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16}, 1022 * {"vfpv3xd", FPU_ARCH_VFP_V3xD}, 1023 * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16}, 1024 * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, 1025 * {"neon-fp16", FPU_ARCH_NEON_FP16}, 1026 * {"vfpv4", FPU_ARCH_VFP_V4}, 1027 * {"vfpv4-d16", FPU_ARCH_VFP_V4D16}, 1028 * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16}, 1029 * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4}, 1030 * 1031 * 1032 * Simplified diagram that only includes FPUs supported by Android: 1033 * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI, 1034 * all others are optional and must be probed at runtime. 1035 * 1036 * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3) 1037 * | 1038 * +-------------------+ 1039 * | | 1040 * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) 1041 * | 1042 * +-------------------+ 1043 * | | 1044 * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) 1045 * | | 1046 * | ARCH_VFP_V4 (+EXT_D32) 1047 * | | 1048 * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) 1049 * | 1050 * ARCH_VFP_V3 (+EXT_D32) 1051 * | 1052 * +-------------------+ 1053 * | | 1054 * | ARCH_VFP_V3_FP16 (+EXT_FP16) 1055 * | 1056 * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) 1057 * | 1058 * ARCH_NEON_FP16 (+EXT_FP16) 1059 * 1060 */ 1061