1/*--------------------------------------------------------------------*/
2/*--- Callgrind                                                    ---*/
3/*---                                                       bbcc.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Callgrind, a Valgrind tool for call tracing.
8
9   Copyright (C) 2002-2015, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
10
11   This program is free software; you can redistribute it and/or
12   modify it under the terms of the GNU General Public License as
13   published by the Free Software Foundation; either version 2 of the
14   License, or (at your option) any later version.
15
16   This program is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19   General Public License for more details.
20
21   You should have received a copy of the GNU General Public License
22   along with this program; if not, write to the Free Software
23   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24   02111-1307, USA.
25
26   The GNU General Public License is contained in the file COPYING.
27*/
28
29#include "global.h"
30#include "costs.h"
31
32#include "pub_tool_threadstate.h"
33
34/*------------------------------------------------------------*/
35/*--- BBCC operations                                      ---*/
36/*------------------------------------------------------------*/
37
38#define N_BBCC_INITIAL_ENTRIES  10437
39
40/* BBCC table (key is BB/Context), per thread, resizable */
41bbcc_hash current_bbccs;
42
43void CLG_(init_bbcc_hash)(bbcc_hash* bbccs)
44{
45   Int i;
46
47   CLG_ASSERT(bbccs != 0);
48
49   bbccs->size    = N_BBCC_INITIAL_ENTRIES;
50   bbccs->entries = 0;
51   bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1",
52                                      bbccs->size * sizeof(BBCC*));
53
54   for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL;
55}
56
57void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst)
58{
59  CLG_ASSERT(dst != 0);
60
61  dst->size    = current_bbccs.size;
62  dst->entries = current_bbccs.entries;
63  dst->table   = current_bbccs.table;
64}
65
66bbcc_hash* CLG_(get_current_bbcc_hash)()
67{
68  return &current_bbccs;
69}
70
71void CLG_(set_current_bbcc_hash)(bbcc_hash* h)
72{
73  CLG_ASSERT(h != 0);
74
75  current_bbccs.size    = h->size;
76  current_bbccs.entries = h->entries;
77  current_bbccs.table   = h->table;
78}
79
80/*
81 * Zero all costs of a BBCC
82 */
83void CLG_(zero_bbcc)(BBCC* bbcc)
84{
85  Int i;
86  jCC* jcc;
87
88  CLG_ASSERT(bbcc->cxt != 0);
89  CLG_DEBUG(1, "  zero_bbcc: BB %#lx, Cxt %u "
90	   "(fn '%s', rec %u)\n",
91	   bb_addr(bbcc->bb),
92	   bbcc->cxt->base_number + bbcc->rec_index,
93	   bbcc->cxt->fn[0]->name,
94	   bbcc->rec_index);
95
96  if ((bbcc->ecounter_sum ==0) &&
97      (bbcc->ret_counter ==0)) return;
98
99  for(i=0;i<bbcc->bb->cost_count;i++)
100    bbcc->cost[i] = 0;
101  for(i=0;i <= bbcc->bb->cjmp_count;i++) {
102    bbcc->jmp[i].ecounter = 0;
103    for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from)
104	CLG_(init_cost)( CLG_(sets).full, jcc->cost );
105  }
106  bbcc->ecounter_sum = 0;
107  bbcc->ret_counter = 0;
108}
109
110
111
112void CLG_(forall_bbccs)(void (*func)(BBCC*))
113{
114  BBCC *bbcc, *bbcc2;
115  int i, j;
116
117  for (i = 0; i < current_bbccs.size; i++) {
118    if ((bbcc=current_bbccs.table[i]) == NULL) continue;
119    while (bbcc) {
120      /* every bbcc should have a rec_array */
121      CLG_ASSERT(bbcc->rec_array != 0);
122
123      for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) {
124	if ((bbcc2 = bbcc->rec_array[j]) == 0) continue;
125
126	(*func)(bbcc2);
127      }
128      bbcc = bbcc->next;
129    }
130  }
131}
132
133
134/* All BBCCs for recursion level 0 are inserted into a
135 * thread specific hash table with key
136 * - address of BB structure (unique, as never freed)
137 * - current context (includes caller chain)
138 * BBCCs for other recursion levels are in bbcc->rec_array.
139 *
140 * The hash is used in setup_bb(), i.e. to find the cost
141 * counters to be changed in the execution of a BB.
142 */
143
144static __inline__
145UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size)
146{
147   CLG_ASSERT(bb != 0);
148   CLG_ASSERT(cxt != 0);
149
150   return ((Addr)bb + (Addr)cxt) % size;
151}
152
153
154/* Lookup for a BBCC in hash.
155 */
156static
157BBCC* lookup_bbcc(BB* bb, Context* cxt)
158{
159   BBCC* bbcc = bb->last_bbcc;
160   UInt  idx;
161
162   /* check LRU */
163   if (bbcc->cxt == cxt) {
164       if (!CLG_(clo).separate_threads) {
165	   /* if we don't dump threads separate, tid doesn't have to match */
166	   return bbcc;
167       }
168       if (bbcc->tid == CLG_(current_tid)) return bbcc;
169   }
170
171   CLG_(stat).bbcc_lru_misses++;
172
173   idx = bbcc_hash_idx(bb, cxt, current_bbccs.size);
174   bbcc = current_bbccs.table[idx];
175   while (bbcc &&
176	  (bb      != bbcc->bb ||
177	   cxt     != bbcc->cxt)) {
178       bbcc = bbcc->next;
179   }
180
181   CLG_DEBUG(2,"  lookup_bbcc(BB %#lx, Cxt %u, fn '%s'): %p (tid %u)\n",
182	    bb_addr(bb), cxt->base_number, cxt->fn[0]->name,
183	    bbcc, bbcc ? bbcc->tid : 0);
184
185   CLG_DEBUGIF(2)
186     if (bbcc) CLG_(print_bbcc)(-2,bbcc);
187
188   return bbcc;
189}
190
191
192/* double size of hash table 1 (addr->BBCC) */
193static void resize_bbcc_hash(void)
194{
195    Int i, new_size, conflicts1 = 0, conflicts2 = 0;
196    BBCC** new_table;
197    UInt new_idx;
198    BBCC *curr_BBCC, *next_BBCC;
199
200    new_size = 2*current_bbccs.size+3;
201    new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1",
202                                    new_size * sizeof(BBCC*));
203
204    for (i = 0; i < new_size; i++)
205      new_table[i] = NULL;
206
207    for (i = 0; i < current_bbccs.size; i++) {
208	if (current_bbccs.table[i] == NULL) continue;
209
210	curr_BBCC = current_bbccs.table[i];
211	while (NULL != curr_BBCC) {
212	    next_BBCC = curr_BBCC->next;
213
214	    new_idx = bbcc_hash_idx(curr_BBCC->bb,
215				    curr_BBCC->cxt,
216				    new_size);
217
218	    curr_BBCC->next = new_table[new_idx];
219	    new_table[new_idx] = curr_BBCC;
220	    if (curr_BBCC->next) {
221		conflicts1++;
222		if (curr_BBCC->next->next)
223		    conflicts2++;
224	    }
225
226	    curr_BBCC = next_BBCC;
227	}
228    }
229
230    VG_(free)(current_bbccs.table);
231
232
233    CLG_DEBUG(0,"Resize BBCC Hash: %u => %d (entries %u, conflicts %d/%d)\n",
234	     current_bbccs.size, new_size,
235	     current_bbccs.entries, conflicts1, conflicts2);
236
237    current_bbccs.size = new_size;
238    current_bbccs.table = new_table;
239    CLG_(stat).bbcc_hash_resizes++;
240}
241
242
243static __inline
244BBCC** new_recursion(int size)
245{
246    BBCC** bbccs;
247    int i;
248
249    bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size);
250    for(i=0;i<size;i++)
251	bbccs[i] = 0;
252
253    CLG_DEBUG(3,"  new_recursion(size %d): %p\n", size, bbccs);
254
255    return bbccs;
256}
257
258
259/*
260 * Allocate a new BBCC
261 *
262 * Uninitialized:
263 * cxt, rec_index, rec_array, next_bbcc, next1, next2
264 */
265static __inline__
266BBCC* new_bbcc(BB* bb)
267{
268   BBCC* bbcc;
269   Int i;
270
271   /* We need cjmp_count+1 JmpData structs:
272    * the last is for the unconditional jump/call/ret at end of BB
273    */
274   bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1",
275			    sizeof(BBCC) +
276			    (bb->cjmp_count+1) * sizeof(JmpData));
277   bbcc->bb  = bb;
278   bbcc->tid = CLG_(current_tid);
279
280   bbcc->ret_counter = 0;
281   bbcc->skipped = 0;
282   bbcc->cost = CLG_(get_costarray)(bb->cost_count);
283   for(i=0;i<bb->cost_count;i++)
284     bbcc->cost[i] = 0;
285   for(i=0; i<=bb->cjmp_count; i++) {
286       bbcc->jmp[i].ecounter = 0;
287       bbcc->jmp[i].jcc_list = 0;
288   }
289   bbcc->ecounter_sum = 0;
290
291   /* Init pointer caches (LRU) */
292   bbcc->lru_next_bbcc = 0;
293   bbcc->lru_from_jcc  = 0;
294   bbcc->lru_to_jcc  = 0;
295
296   CLG_(stat).distinct_bbccs++;
297
298   CLG_DEBUG(3, "  new_bbcc(BB %#lx): %p (now %d)\n",
299	    bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs);
300
301   return bbcc;
302}
303
304
305/**
306 * Inserts a new BBCC into hashes.
307 * BBCC specific items must be set as this is used for the hash
308 * keys:
309 *  fn     : current function
310 *  tid    : current thread ID
311 *  from   : position where current function is called from
312 *
313 * Recursion level doesn't need to be set as this is not included
314 * in the hash key: Only BBCCs with rec level 0 are in hashes.
315 */
316static
317void insert_bbcc_into_hash(BBCC* bbcc)
318{
319    UInt idx;
320
321    CLG_ASSERT(bbcc->cxt != 0);
322
323    CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n",
324	     bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name);
325
326    /* check fill degree of hash and resize if needed (>90%) */
327    current_bbccs.entries++;
328    if (100 * current_bbccs.entries / current_bbccs.size > 90)
329	resize_bbcc_hash();
330
331    idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size);
332    bbcc->next = current_bbccs.table[idx];
333    current_bbccs.table[idx] = bbcc;
334
335    CLG_DEBUG(3,"- insert_bbcc_into_hash: %u entries\n",
336	     current_bbccs.entries);
337}
338
339/* String is returned in a dynamically allocated buffer. Caller is
340   responsible for free'ing it. */
341static HChar* mangled_cxt(const Context* cxt, Int rec_index)
342{
343    Int i, p;
344
345    if (!cxt) return VG_(strdup)("cl.bbcc.mcxt", "(no context)");
346
347    /* Overestimate the number of bytes we need to hold the string. */
348    SizeT need = 20;   // rec_index + nul-terminator
349    for (i = 0; i < cxt->size; ++i)
350       need += VG_(strlen)(cxt->fn[i]->name) + 1;   // 1 for leading '
351
352    HChar *mangled = CLG_MALLOC("cl.bbcc.mcxt", need);
353    p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name);
354    if (rec_index >0)
355	p += VG_(sprintf)(mangled+p, "'%d", rec_index +1);
356    for(i=1;i<cxt->size;i++)
357	p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name);
358
359    return mangled;
360}
361
362
363/* Create a new BBCC as a copy of an existing one,
364 * but with costs set to 0 and jcc chains empty.
365 *
366 * This is needed when a BB is executed in another context than
367 * the one at instrumentation time of the BB.
368 *
369 * Use cases:
370 *  rec_index == 0: clone from a BBCC with differing tid/cxt
371 *                  and insert into hashes
372 *  rec_index >0  : clone from a BBCC with same tid/cxt and rec_index 0
373 *                  don't insert into hashes
374 */
375static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index)
376{
377    BBCC* bbcc;
378
379    CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n",
380	     bb_addr(orig->bb), rec_index, cxt->fn[0]->name);
381
382    bbcc = new_bbcc(orig->bb);
383
384    if (rec_index == 0) {
385
386      /* hash insertion is only allowed if tid or cxt is different */
387      CLG_ASSERT((orig->tid != CLG_(current_tid)) ||
388		(orig->cxt != cxt));
389
390      bbcc->rec_index = 0;
391      bbcc->cxt = cxt;
392      bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions);
393      bbcc->rec_array[0] = bbcc;
394
395      insert_bbcc_into_hash(bbcc);
396    }
397    else {
398      if (CLG_(clo).separate_threads)
399	CLG_ASSERT(orig->tid == CLG_(current_tid));
400
401      CLG_ASSERT(orig->cxt == cxt);
402      CLG_ASSERT(orig->rec_array);
403      CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index);
404      CLG_ASSERT(orig->rec_array[rec_index] ==0);
405
406      /* new BBCC will only have differing recursion level */
407      bbcc->rec_index = rec_index;
408      bbcc->cxt = cxt;
409      bbcc->rec_array = orig->rec_array;
410      bbcc->rec_array[rec_index] = bbcc;
411    }
412
413    /* update list of BBCCs for same BB */
414    bbcc->next_bbcc = orig->bb->bbcc_list;
415    orig->bb->bbcc_list = bbcc;
416
417
418    CLG_DEBUGIF(3)
419      CLG_(print_bbcc)(-2, bbcc);
420
421    HChar *mangled_orig = mangled_cxt(orig->cxt, orig->rec_index);
422    HChar *mangled_bbcc = mangled_cxt(bbcc->cxt, bbcc->rec_index);
423    CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n"
424		"   orig %s\n"
425		"   new  %s\n",
426	     orig, rec_index, bb_addr(orig->bb),
427             mangled_orig,
428             mangled_bbcc);
429    CLG_FREE(mangled_orig);
430    CLG_FREE(mangled_bbcc);
431
432    CLG_(stat).bbcc_clones++;
433
434    return bbcc;
435};
436
437
438
439/* Get a pointer to the cost centre structure for given basic block
440 * address. If created, the BBCC is inserted into the BBCC hash.
441 * Also sets BB_seen_before by reference.
442 *
443 */
444BBCC* CLG_(get_bbcc)(BB* bb)
445{
446   BBCC* bbcc;
447
448   CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb));
449
450   bbcc = bb->bbcc_list;
451
452   if (!bbcc) {
453     bbcc = new_bbcc(bb);
454
455     /* initialize BBCC */
456     bbcc->cxt       = 0;
457     bbcc->rec_array = 0;
458     bbcc->rec_index = 0;
459
460     bbcc->next_bbcc = bb->bbcc_list;
461     bb->bbcc_list = bbcc;
462     bb->last_bbcc = bbcc;
463
464     CLG_DEBUGIF(3)
465       CLG_(print_bbcc)(-2, bbcc);
466   }
467
468   CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n",
469		bb_addr(bb), bbcc);
470
471   return bbcc;
472}
473
474
475/* Callgrind manages its own call stack for each thread.
476 * When leaving a function, a underflow can happen when
477 * Callgrind's tracing was switched on in the middle of
478 * a run, i.e. when Callgrind was not able to trace the
479 * call instruction.
480 * This function tries to reconstruct the original call.
481 * As we know the return address (the address following
482 * the CALL instruction), we can detect the function
483 * we return back to, but the original call site is unknown.
484 * We suppose a call site at return address - 1.
485 * (TODO: other heuristic: lookup info of instrumented BBs).
486 */
487static void handleUnderflow(BB* bb)
488{
489  /* RET at top of call stack */
490  BBCC* source_bbcc;
491  BB* source_bb;
492  Bool seen_before;
493  fn_node* caller;
494  int fn_number;
495  unsigned *pactive;
496  call_entry* call_entry_up;
497
498  CLG_DEBUG(1,"  Callstack underflow !\n");
499
500  /* we emulate an old call from the function we return to
501   * by using (<return address> -1) */
502  source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before);
503  source_bbcc = CLG_(get_bbcc)(source_bb);
504
505  /* seen_before can be true if RET from a signal handler */
506  if (!seen_before) {
507    source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0;
508  }
509  else if (CLG_(current_state).collect)
510    source_bbcc->ecounter_sum++;
511
512  /* Force a new top context, will be set active by push_cxt() */
513  CLG_(current_fn_stack).top--;
514  CLG_(current_state).cxt = 0;
515  caller = CLG_(get_fn_node)(bb);
516  CLG_(push_cxt)( caller );
517
518  if (!seen_before) {
519    /* set rec array for source BBCC: this is at rec level 1 */
520    source_bbcc->rec_array = new_recursion(caller->separate_recursions);
521    source_bbcc->rec_array[0] = source_bbcc;
522
523    CLG_ASSERT(source_bbcc->cxt == 0);
524    source_bbcc->cxt = CLG_(current_state).cxt;
525    insert_bbcc_into_hash(source_bbcc);
526  }
527  CLG_ASSERT(CLG_(current_state).bbcc);
528
529  /* correct active counts */
530  fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number;
531  pactive = CLG_(get_fn_entry)(fn_number);
532  (*pactive)--;
533
534  /* This assertion is not correct for reentrant
535   * signal handlers */
536  /* CLG_ASSERT(*pactive == 0); */
537
538  CLG_(current_state).nonskipped = 0; /* we didn't skip this function */
539  /* back to current context */
540  CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] );
541  CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc,
542		       (Addr)-1, False);
543  call_entry_up =
544    &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]);
545  /* assume this call is lasting since last dump or
546   * for a signal handler since it's call */
547  if (CLG_(current_state).sig == 0)
548    CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost,
549		    CLG_(get_current_thread)()->lastdump_cost );
550  else
551    CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost );
552}
553
554
555/*
556 * Helper function called at start of each instrumented BB to setup
557 * pointer to costs for current thread/context/recursion level
558 */
559
560VG_REGPARM(1)
561void CLG_(setup_bbcc)(BB* bb)
562{
563  BBCC *bbcc, *last_bbcc;
564  Bool  call_emulation = False, delayed_push = False, skip = False;
565  Addr sp;
566  BB* last_bb;
567  ThreadId tid;
568  ClgJumpKind jmpkind;
569  Bool isConditionalJump;
570  Int passed = 0, csp;
571  Bool ret_without_call = False;
572  Int popcount_on_return = 1;
573
574  CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb));
575
576  /* This is needed because thread switches can not reliable be tracked
577   * with callback CLG_(run_thread) only: we have otherwise no way to get
578   * the thread ID after a signal handler returns.
579   * This could be removed again if that bug is fixed in Valgrind.
580   * This is in the hot path but hopefully not to costly.
581   */
582  tid = VG_(get_running_tid)();
583#if 1
584  /* CLG_(switch_thread) is a no-op when tid is equal to CLG_(current_tid).
585   * As this is on the hot path, we only call CLG_(switch_thread)(tid)
586   * if tid differs from the CLG_(current_tid).
587   */
588  if (UNLIKELY(tid != CLG_(current_tid)))
589     CLG_(switch_thread)(tid);
590#else
591  CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid));
592#endif
593
594  sp = VG_(get_SP)(tid);
595  last_bbcc = CLG_(current_state).bbcc;
596  last_bb = last_bbcc ? last_bbcc->bb : 0;
597
598  if (last_bb) {
599      passed = CLG_(current_state).jmps_passed;
600      CLG_ASSERT(passed <= last_bb->cjmp_count);
601      jmpkind = last_bb->jmp[passed].jmpkind;
602      isConditionalJump = (passed < last_bb->cjmp_count);
603
604      if (CLG_(current_state).collect) {
605	if (!CLG_(current_state).nonskipped) {
606	  last_bbcc->ecounter_sum++;
607	  last_bbcc->jmp[passed].ecounter++;
608	  if (!CLG_(clo).simulate_cache) {
609	      /* update Ir cost */
610              UInt instr_count = last_bb->jmp[passed].instr+1;
611              CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
612	  }
613	}
614	else {
615	  /* do not increment exe counter of BBs in skipped functions, as it
616	   * would fool dumping code */
617	  if (!CLG_(clo).simulate_cache) {
618	      /* update Ir cost */
619              UInt instr_count = last_bb->jmp[passed].instr+1;
620              CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
621              CLG_(current_state).nonskipped->skipped[ fullOffset(EG_IR) ]
622		+= instr_count;
623	  }
624	}
625      }
626
627      CLG_DEBUGIF(4) {
628	  CLG_(print_execstate)(-2, &CLG_(current_state) );
629	  CLG_(print_bbcc_cost)(-2, last_bbcc);
630      }
631  }
632  else {
633      jmpkind = jk_None;
634      isConditionalJump = False;
635  }
636
637  /* Manipulate JmpKind if needed, only using BB specific info */
638
639  csp = CLG_(current_call_stack).sp;
640
641  /* A return not matching the top call in our callstack is a jump */
642  if ( (jmpkind == jk_Return) && (csp >0)) {
643      Int csp_up = csp-1;
644      call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]);
645
646      /* We have a real return if
647       * - the stack pointer (SP) left the current stack frame, or
648       * - SP has the same value as when reaching the current function
649       *   and the address of this BB is the return address of last call
650       *   (we even allow to leave multiple frames if the SP stays the
651       *    same and we find a matching return address)
652       * The latter condition is needed because on PPC, SP can stay
653       * the same over CALL=b(c)l / RET=b(c)lr boundaries
654       */
655      if (sp < top_ce->sp) popcount_on_return = 0;
656      else if (top_ce->sp == sp) {
657	  while(1) {
658	      if (top_ce->ret_addr == bb_addr(bb)) break;
659	      if (csp_up>0) {
660		  csp_up--;
661		  top_ce = &(CLG_(current_call_stack).entry[csp_up]);
662		  if (top_ce->sp == sp) {
663		      popcount_on_return++;
664		      continue;
665		  }
666	      }
667	      popcount_on_return = 0;
668	      break;
669	  }
670      }
671      if (popcount_on_return == 0) {
672	  jmpkind = jk_Jump;
673	  ret_without_call = True;
674      }
675  }
676
677  /* Should this jump be converted to call or pop/call ? */
678  if (( jmpkind != jk_Return) &&
679      ( jmpkind != jk_Call) && last_bb) {
680
681    /* We simulate a JMP/Cont to be a CALL if
682     * - jump is in another ELF object or section kind
683     * - jump is to first instruction of a function (tail recursion)
684     */
685    if (ret_without_call ||
686	/* This is for detection of optimized tail recursion.
687	 * On PPC, this is only detected as call when going to another
688	 * function. The problem is that on PPC it can go wrong
689	 * more easily (no stack frame setup needed)
690	 */
691#if defined(VGA_ppc32)
692	(bb->is_entry && (last_bb->fn != bb->fn)) ||
693#else
694	bb->is_entry ||
695#endif
696	(last_bb->sect_kind != bb->sect_kind) ||
697	(last_bb->obj->number != bb->obj->number)) {
698
699	CLG_DEBUG(1,"     JMP: %s[%s] to %s[%s]%s!\n",
700		  last_bb->fn->name, last_bb->obj->name,
701		  bb->fn->name, bb->obj->name,
702		  ret_without_call?" (RET w/o CALL)":"");
703
704	if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) {
705
706	    call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]);
707
708	    if (top_ce->jcc) {
709
710		CLG_DEBUG(1,"     Pop on Jump!\n");
711
712		/* change source for delayed push */
713		CLG_(current_state).bbcc = top_ce->jcc->from;
714		sp = top_ce->sp;
715		passed = top_ce->jcc->jmp;
716		CLG_(pop_call_stack)();
717	    }
718	    else {
719		CLG_ASSERT(CLG_(current_state).nonskipped != 0);
720	    }
721	}
722
723	jmpkind = jk_Call;
724	call_emulation = True;
725    }
726  }
727
728  if (jmpkind == jk_Call)
729    skip = CLG_(get_fn_node)(bb)->skip;
730
731  CLG_DEBUGIF(1) {
732    if (isConditionalJump)
733      VG_(printf)("Cond-");
734    switch(jmpkind) {
735    case jk_None:   VG_(printf)("Fall-through"); break;
736    case jk_Jump:   VG_(printf)("Jump"); break;
737    case jk_Call:   VG_(printf)("Call"); break;
738    case jk_Return: VG_(printf)("Return"); break;
739    default:        tl_assert(0);
740    }
741    VG_(printf)(" %08lx -> %08lx, SP %08lx\n",
742		last_bb ? bb_jmpaddr(last_bb) : 0,
743		bb_addr(bb), sp);
744  }
745
746  /* Handle CALL/RET and update context to get correct BBCC */
747
748  if (jmpkind == jk_Return) {
749
750    if ((csp == 0) ||
751	((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) &&
752	 ( *(CLG_(current_fn_stack).top-1)==0)) ) {
753
754      /* On an empty call stack or at a signal separation marker,
755       * a RETURN generates an call stack underflow.
756       */
757      handleUnderflow(bb);
758      CLG_(pop_call_stack)();
759    }
760    else {
761	CLG_ASSERT(popcount_on_return >0);
762	CLG_(unwind_call_stack)(sp, popcount_on_return);
763    }
764  }
765  else {
766    Int unwind_count = CLG_(unwind_call_stack)(sp, 0);
767    if (unwind_count > 0) {
768      /* if unwinding was done, this actually is a return */
769      jmpkind = jk_Return;
770    }
771
772    if (jmpkind == jk_Call) {
773      delayed_push = True;
774
775      csp = CLG_(current_call_stack).sp;
776      if (call_emulation && csp>0)
777	sp = CLG_(current_call_stack).entry[csp-1].sp;
778
779    }
780  }
781
782  /* Change new context if needed, taking delayed_push into account */
783  if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) {
784    CLG_(push_cxt)(CLG_(get_fn_node)(bb));
785  }
786  CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom);
787
788  /* If there is a fresh instrumented BBCC, assign current context */
789  bbcc = CLG_(get_bbcc)(bb);
790  if (bbcc->cxt == 0) {
791    CLG_ASSERT(bbcc->rec_array == 0);
792
793    bbcc->cxt = CLG_(current_state).cxt;
794    bbcc->rec_array =
795      new_recursion((*CLG_(current_fn_stack).top)->separate_recursions);
796    bbcc->rec_array[0] = bbcc;
797
798    insert_bbcc_into_hash(bbcc);
799  }
800  else {
801    /* get BBCC with current context */
802
803    /* first check LRU of last bbcc executed */
804
805    if (last_bbcc) {
806      bbcc = last_bbcc->lru_next_bbcc;
807      if (bbcc &&
808	  ((bbcc->bb != bb) ||
809	   (bbcc->cxt != CLG_(current_state).cxt)))
810	bbcc = 0;
811    }
812    else
813      bbcc = 0;
814
815    if (!bbcc)
816      bbcc = lookup_bbcc(bb, CLG_(current_state).cxt);
817    if (!bbcc)
818      bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0);
819
820    bb->last_bbcc = bbcc;
821  }
822
823  /* save for fast lookup */
824  if (last_bbcc)
825    last_bbcc->lru_next_bbcc = bbcc;
826
827  if ((*CLG_(current_fn_stack).top)->separate_recursions >1) {
828    UInt level, idx;
829    fn_node* top = *(CLG_(current_fn_stack).top);
830
831    level = *CLG_(get_fn_entry)(top->number);
832
833    if (delayed_push && !skip) {
834      if (CLG_(clo).skip_direct_recursion) {
835        /* a call was detected, which means that the source BB != 0 */
836	CLG_ASSERT(CLG_(current_state).bbcc != 0);
837	/* only increment rec. level if called from different function */
838	if (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0])
839	  level++;
840      }
841      else level++;
842    }
843    if (level> top->separate_recursions)
844      level = top->separate_recursions;
845
846    if (level == 0) {
847      /* can only happen if instrumentation just was switched on */
848      level = 1;
849      *CLG_(get_fn_entry)(top->number) = 1;
850    }
851
852    idx = level -1;
853    if (bbcc->rec_array[idx])
854      bbcc = bbcc->rec_array[idx];
855    else
856      bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx);
857
858    CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc);
859  }
860
861  if (delayed_push) {
862    if (!skip && CLG_(current_state).nonskipped) {
863      /* a call from skipped to nonskipped */
864      CLG_(current_state).bbcc = CLG_(current_state).nonskipped;
865      /* FIXME: take the real passed count from shadow stack */
866      passed = CLG_(current_state).bbcc->bb->cjmp_count;
867    }
868    CLG_(push_call_stack)(CLG_(current_state).bbcc, passed,
869			 bbcc, sp, skip);
870  }
871
872  if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) {
873
874    /* Handle conditional jumps followed, i.e. trace arcs
875     * This uses JCC structures, too */
876
877    jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc);
878    CLG_ASSERT(jcc != 0);
879    // Change from default, and check if already changed
880    if (jcc->jmpkind == jk_Call)
881      jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump;
882    else {
883	// FIXME: Why can this fail?
884	// CLG_ASSERT(jcc->jmpkind == jmpkind);
885    }
886
887    jcc->call_counter++;
888    if (isConditionalJump)
889      CLG_(stat).jcnd_counter++;
890    else
891      CLG_(stat).jump_counter++;
892  }
893
894  CLG_(current_state).bbcc = bbcc;
895  /* Even though this will be set in instrumented code directly before
896   * side exits, it needs to be set to 0 here in case an exception
897   * happens in first instructions of the BB */
898  CLG_(current_state).jmps_passed = 0;
899  // needed for log_* handlers called in this BB
900  CLG_(bb_base)   = bb->obj->offset + bb->offset;
901  CLG_(cost_base) = bbcc->cost;
902
903  CLG_DEBUGIF(1) {
904    VG_(printf)("     ");
905    CLG_(print_bbcc_fn)(bbcc);
906    VG_(printf)("\n");
907  }
908
909  CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %u), Instrs %u (Len %u)\n",
910	   bb_addr(bb), bbcc->cost, bb->cost_count,
911	   bb->instr_count, bb->instr_len);
912  CLG_DEBUGIF(3)
913    CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index);
914  CLG_DEBUG(3,"\n");
915
916  CLG_(stat).bb_executions++;
917}
918