15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2005, Google Inc.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// All rights reserved.
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Redistribution and use in source and binary forms, with or without
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// modification, are permitted provided that the following conditions are
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// met:
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Redistributions of source code must retain the above copyright
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// notice, this list of conditions and the following disclaimer.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Redistributions in binary form must reproduce the above
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// copyright notice, this list of conditions and the following disclaimer
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in the documentation and/or other materials provided with the
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// distribution.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Neither the name of Google Inc. nor the names of its
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// contributors may be used to endorse or promote products derived from
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// this software without specific prior written permission.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ---
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Author: Craig Silverstein
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is an internal header file used by profiler.cc.  It defines
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the single (inline) function GetPC.  GetPC is used in a signal
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// handler to figure out the instruction that was being executed when
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the signal-handler was triggered.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To get this, we use the ucontext_t argument to the signal-handler
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// callback, which holds the full context of what was going on when
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the signal triggered.  How to get from a ucontext_t to a Program
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Counter is OS-dependent.
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef BASE_GETPC_H_
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define BASE_GETPC_H_
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "config.h"
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// On many linux systems, we may need _GNU_SOURCE to get access to
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the defined constants that define the register we want to see (eg
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// REG_EIP).  Note this #define must come first!
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define _GNU_SOURCE 1
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If #define _GNU_SOURCE causes problems, this might work instead.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// It will cause problems for FreeBSD though!, because it turns off
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the needed __BSD_VISIBLE.
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//#define _XOPEN_SOURCE 500
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h>         // for memcmp
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(HAVE_SYS_UCONTEXT_H)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <sys/ucontext.h>
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(HAVE_UCONTEXT_H)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <ucontext.h>       // for ucontext_t (and also mcontext_t)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(HAVE_CYGWIN_SIGNAL_H)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <cygwin/signal.h>
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef ucontext ucontext_t;
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Take the example where function Foo() calls function Bar().  For
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// many architectures, Bar() is responsible for setting up and tearing
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// down its own stack frame.  In that case, it's possible for the
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// interrupt to happen when execution is in Bar(), but the stack frame
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is not properly set up (either before it's done being set up, or
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// after it's been torn down but before Bar() returns).  In those
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// cases, the stack trace cannot see the caller function anymore.
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// GetPC can try to identify this situation, on architectures where it
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// might occur, and unwind the current function call in that case to
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// avoid false edges in the profile graph (that is, edges that appear
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// to show a call skipping over a function).  To do this, we hard-code
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in the asm instructions we might see when setting up or tearing
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// down a stack frame.
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is difficult to get right: the instructions depend on the
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// processor, the compiler ABI, and even the optimization level.  This
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is a best effort patch -- if we fail to detect such a situation, or
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// mess up the PC, nothing happens; the returned PC is not used for
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// any further processing.
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct CallUnrollInfo {
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Offset from (e)ip register where this instruction sequence
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // should be matched. Interpreted as bytes. Offset 0 is the next
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instruction to execute. Be extra careful with negative offsets in
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // architectures of variable instruction length (like x86) - it is
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // not that easy as taking an offset to step one instruction back!
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int pc_offset;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The actual instruction bytes. Feel free to make it larger if you
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // need a longer sequence.
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char ins[16];
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // How many bytes to match from ins array?
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int ins_size;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The offset from the stack pointer (e)sp where to look for the
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // call return address. Interpreted as bytes.
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int return_sp_offset;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The dereferences needed to get the PC from a struct ucontext were
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// determined at configure time, and stored in the macro
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// PC_FROM_UCONTEXT in config.h.  The only thing we need to do here,
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// then, is to do the magic call-unrolling for systems that support it.
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -- Special case 1: linux x86, for which we have CallUnrollInfo
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(__linux) && defined(__i386) && defined(__GNUC__)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const CallUnrollInfo callunrollinfo[] = {
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Entry to a function:  push %ebp;  mov  %esp,%ebp
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Top-of-stack contains the caller IP.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { 0,
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {0x55, 0x89, 0xe5}, 3,
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    0
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  },
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Entry to a function, second instruction:  push %ebp;  mov  %esp,%ebp
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Top-of-stack contains the old frame, caller IP is +4.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { -1,
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {0x55, 0x89, 0xe5}, 3,
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    4
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  },
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return from a function: RET.
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Top-of-stack contains the caller IP.
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { 0,
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {0xc3}, 1,
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    0
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline void* GetPC(const ucontext_t& signal_ucontext) {
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // See comment above struct CallUnrollInfo.  Only try instruction
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // flow matching if both eip and esp looks reasonable.
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int eip = signal_ucontext.uc_mcontext.gregs[REG_EIP];
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int esp = signal_ucontext.uc_mcontext.gregs[REG_ESP];
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 &&
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (esp & 0xffff0000) != 0) {
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    char* eip_char = reinterpret_cast<char*>(eip);
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) {
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!memcmp(eip_char + callunrollinfo[i].pc_offset,
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  callunrollinfo[i].ins, callunrollinfo[i].ins_size)) {
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We have a match.
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset);
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return *retaddr;
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (void*)eip;
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Special case #2: Windows, which has to do something totally different.
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If this is ever implemented, probably the way to do it is to have
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// profiler.cc use a high-precision timer via timeSetEvent:
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    http://msdn2.microsoft.com/en-us/library/ms712713.aspx
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC.
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The callback function would be something like prof_handler, but
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// alas the arguments are different: no ucontext_t!  I don't know
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// how we'd get the PC (using StackWalk64?)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    http://msdn2.microsoft.com/en-us/library/ms680650.aspx
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"   // for RAW_LOG
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef HAVE_CYGWIN_SIGNAL_H
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef int ucontext_t;
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline void* GetPC(const struct ucontext_t& signal_ucontext) {
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n");
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return NULL;
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Normal cases.  If this doesn't compile, it's probably because
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// PC_FROM_UCONTEXT is the empty string.  You need to figure out
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the right value for your system, and add it to the list in
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// configure.ac (or set it manually in your config.h).
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline void* GetPC(const ucontext_t& signal_ucontext) {
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (void*)signal_ucontext.PC_FROM_UCONTEXT;   // defined in config.h
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // BASE_GETPC_H_
187