15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Copyright (c) 2007, Google Inc.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * All rights reserved.
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * modification, are permitted provided that the following conditions are
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * met:
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *     * Redistributions of source code must retain the above copyright
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * notice, this list of conditions and the following disclaimer.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *     * Redistributions in binary form must reproduce the above
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * copyright notice, this list of conditions and the following disclaimer
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * in the documentation and/or other materials provided with the
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * distribution.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *     * Neither the name of Google Inc. nor the names of its
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * contributors may be used to endorse or promote products derived from
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * this software without specific prior written permission.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * ---
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This module gets enough CPU information to optimize the
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * atomicops module on x86.
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/atomicops.h"
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/googleinit.h"
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h>
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file only makes sense with atomicops-internals-x86.h -- it
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// depends on structs that are defined in that file.  If atomicops.h
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// doesn't sub-include that file, then we aren't needed, and shouldn't
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// try to do anything.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef BASE_ATOMICOPS_INTERNALS_X86_H_
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Inline cpuid instruction.  In PIC compilations, %ebx contains the address
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// of the global offset table.  To avoid breaking such executables, this code
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// must preserve that register's value across cpuid instructions.
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(__i386__)
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define cpuid(a, b, c, d, inp) \
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  asm ("mov %%ebx, %%edi\n"    \
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       "cpuid\n"               \
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       "xchg %%edi, %%ebx\n"   \
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined (__x86_64__)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define cpuid(a, b, c, d, inp) \
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  asm ("mov %%rbx, %%rdi\n"    \
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       "cpuid\n"               \
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       "xchg %%rdi, %%rbx\n"   \
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(cpuid)        // initialize the struct only on x86
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Set the flags so that code will run correctly and conservatively
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// until InitGoogle() is called.
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = {
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  false,          // bug can't exist before process spawns multiple threads
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  false,          // no SSE2
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  false,          // no cmpxchg16b
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Initialize the AtomicOps_Internalx86CPUFeatures struct.
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void AtomicOps_Internalx86CPUFeaturesInit() {
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 eax;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 ebx;
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 ecx;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 edx;
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Get vendor string (issue CPUID with eax = 0)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cpuid(eax, ebx, ecx, edx, 0);
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char vendor[13];
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memcpy(vendor, &ebx, 4);
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memcpy(vendor + 4, &edx, 4);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memcpy(vendor + 8, &ecx, 4);
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vendor[12] = 0;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // get feature flags in ecx/edx, and family/model in eax
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cpuid(eax, ebx, ecx, edx, 1);
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int family = (eax >> 8) & 0xf;        // family and model fields
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int model = (eax >> 4) & 0xf;
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (family == 0xf) {                  // use extended family and model fields
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    family += (eax >> 20) & 0xff;
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model += ((eax >> 16) & 0xf) << 4;
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Opteron Rev E has a bug in which on very rare occasions a locked
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instruction doesn't act as a read-acquire barrier if followed by a
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // non-locked read-modify-write instruction.  Rev F has this bug in
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // pre-release versions, but not in versions released to customers,
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // so we test only for Rev E, which is family 15, model 32..63 inclusive.
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (strcmp(vendor, "AuthenticAMD") == 0 &&       // AMD
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      family == 15 &&
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      32 <= model && model <= 63) {
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true;
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false;
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // edx bit 26 is SSE2 which we use to tell use whether we can use mfence
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1);
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ecx bit 13 indicates whether the cmpxchg16b instruction is supported
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1);
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)REGISTER_MODULE_INITIALIZER(atomicops_x86, {
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AtomicOps_Internalx86CPUFeaturesInit();
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)});
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  /* ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ */
126