1
2/*--------------------------------------------------------------------*/
3/*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
4/*--- accessibility (A) and validity (V) status of each byte.      ---*/
5/*---                                                    mc_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of MemCheck, a heavyweight Valgrind tool for
10   detecting memory errors.
11
12   Copyright (C) 2000-2013 Julian Seward
13      jseward@acm.org
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_aspacemgr.h"
35#include "pub_tool_gdbserver.h"
36#include "pub_tool_poolalloc.h"
37#include "pub_tool_hashtable.h"     // For mc_include.h
38#include "pub_tool_libcbase.h"
39#include "pub_tool_libcassert.h"
40#include "pub_tool_libcprint.h"
41#include "pub_tool_machine.h"
42#include "pub_tool_mallocfree.h"
43#include "pub_tool_options.h"
44#include "pub_tool_oset.h"
45#include "pub_tool_rangemap.h"
46#include "pub_tool_replacemalloc.h"
47#include "pub_tool_tooliface.h"
48#include "pub_tool_threadstate.h"
49
50#include "mc_include.h"
51#include "memcheck.h"   /* for client requests */
52
53
54/* Set to 1 to do a little more sanity checking */
55#define VG_DEBUG_MEMORY 0
56
57#define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
58
59static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
60static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
61
62
63/*------------------------------------------------------------*/
64/*--- Fast-case knobs                                      ---*/
65/*------------------------------------------------------------*/
66
67// Comment these out to disable the fast cases (don't just set them to zero).
68
69#define PERF_FAST_LOADV    1
70#define PERF_FAST_STOREV   1
71
72#define PERF_FAST_SARP     1
73
74#define PERF_FAST_STACK    1
75#define PERF_FAST_STACK2   1
76
77/* Change this to 1 to enable assertions on origin tracking cache fast
78   paths */
79#define OC_ENABLE_ASSERTIONS 0
80
81
82/*------------------------------------------------------------*/
83/*--- Comments on the origin tracking implementation       ---*/
84/*------------------------------------------------------------*/
85
86/* See detailed comment entitled
87   AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
88   which is contained further on in this file. */
89
90
91/*------------------------------------------------------------*/
92/*--- V bits and A bits                                    ---*/
93/*------------------------------------------------------------*/
94
95/* Conceptually, every byte value has 8 V bits, which track whether Memcheck
96   thinks the corresponding value bit is defined.  And every memory byte
97   has an A bit, which tracks whether Memcheck thinks the program can access
98   it safely (ie. it's mapped, and has at least one of the RWX permission bits
99   set).  So every N-bit register is shadowed with N V bits, and every memory
100   byte is shadowed with 8 V bits and one A bit.
101
102   In the implementation, we use two forms of compression (compressed V bits
103   and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
104   for memory.
105
106   Memcheck also tracks extra information about each heap block that is
107   allocated, for detecting memory leaks and other purposes.
108*/
109
110/*------------------------------------------------------------*/
111/*--- Basic A/V bitmap representation.                     ---*/
112/*------------------------------------------------------------*/
113
114/* All reads and writes are checked against a memory map (a.k.a. shadow
115   memory), which records the state of all memory in the process.
116
117   On 32-bit machines the memory map is organised as follows.
118   The top 16 bits of an address are used to index into a top-level
119   map table, containing 65536 entries.  Each entry is a pointer to a
120   second-level map, which records the accesibililty and validity
121   permissions for the 65536 bytes indexed by the lower 16 bits of the
122   address.  Each byte is represented by two bits (details are below).  So
123   each second-level map contains 16384 bytes.  This two-level arrangement
124   conveniently divides the 4G address space into 64k lumps, each size 64k
125   bytes.
126
127   All entries in the primary (top-level) map must point to a valid
128   secondary (second-level) map.  Since many of the 64kB chunks will
129   have the same status for every bit -- ie. noaccess (for unused
130   address space) or entirely addressable and defined (for code segments) --
131   there are three distinguished secondary maps, which indicate 'noaccess',
132   'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
133   map entry points to the relevant distinguished map.  In practice,
134   typically more than half of the addressable memory is represented with
135   the 'undefined' or 'defined' distinguished secondary map, so it gives a
136   good saving.  It also lets us set the V+A bits of large address regions
137   quickly in set_address_range_perms().
138
139   On 64-bit machines it's more complicated.  If we followed the same basic
140   scheme we'd have a four-level table which would require too many memory
141   accesses.  So instead the top-level map table has 2^20 entries (indexed
142   using bits 16..35 of the address);  this covers the bottom 64GB.  Any
143   accesses above 64GB are handled with a slow, sparse auxiliary table.
144   Valgrind's address space manager tries very hard to keep things below
145   this 64GB barrier so that performance doesn't suffer too much.
146
147   Note that this file has a lot of different functions for reading and
148   writing shadow memory.  Only a couple are strictly necessary (eg.
149   get_vabits2 and set_vabits2), most are just specialised for specific
150   common cases to improve performance.
151
152   Aside: the V+A bits are less precise than they could be -- we have no way
153   of marking memory as read-only.  It would be great if we could add an
154   extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
155   which requires 2.3 bits to hold, and there's no way to do that elegantly
156   -- we'd have to double up to 4 bits of metadata per byte, which doesn't
157   seem worth it.
158*/
159
160/* --------------- Basic configuration --------------- */
161
162/* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
163
164#if VG_WORDSIZE == 4
165
166/* cover the entire address space */
167#  define N_PRIMARY_BITS  16
168
169#else
170
171/* Just handle the first 64G fast and the rest via auxiliary
172   primaries.  If you change this, Memcheck will assert at startup.
173   See the definition of UNALIGNED_OR_HIGH for extensive comments. */
174#  define N_PRIMARY_BITS  20
175
176#endif
177
178
179/* Do not change this. */
180#define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
181
182/* Do not change this. */
183#define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
184
185
186/* --------------- Secondary maps --------------- */
187
188// Each byte of memory conceptually has an A bit, which indicates its
189// addressability, and 8 V bits, which indicates its definedness.
190//
191// But because very few bytes are partially defined, we can use a nice
192// compression scheme to reduce the size of shadow memory.  Each byte of
193// memory has 2 bits which indicates its state (ie. V+A bits):
194//
195//   00:  noaccess    (unaddressable but treated as fully defined)
196//   01:  undefined   (addressable and fully undefined)
197//   10:  defined     (addressable and fully defined)
198//   11:  partdefined (addressable and partially defined)
199//
200// In the "partdefined" case, we use a secondary table to store the V bits.
201// Each entry in the secondary-V-bits table maps a byte address to its 8 V
202// bits.
203//
204// We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
205// four bytes (32 bits) of memory are in each chunk.  Hence the name
206// "vabits8".  This lets us get the V+A bits for four bytes at a time
207// easily (without having to do any shifting and/or masking), and that is a
208// very common operation.  (Note that although each vabits8 chunk
209// is 8 bits in size, it represents 32 bits of memory.)
210//
211// The representation is "inverse" little-endian... each 4 bytes of
212// memory is represented by a 1 byte value, where:
213//
214// - the status of byte (a+0) is held in bits [1..0]
215// - the status of byte (a+1) is held in bits [3..2]
216// - the status of byte (a+2) is held in bits [5..4]
217// - the status of byte (a+3) is held in bits [7..6]
218//
219// It's "inverse" because endianness normally describes a mapping from
220// value bits to memory addresses;  in this case the mapping is inverted.
221// Ie. instead of particular value bits being held in certain addresses, in
222// this case certain addresses are represented by particular value bits.
223// See insert_vabits2_into_vabits8() for an example.
224//
225// But note that we don't compress the V bits stored in registers;  they
226// need to be explicit to made the shadow operations possible.  Therefore
227// when moving values between registers and memory we need to convert
228// between the expanded in-register format and the compressed in-memory
229// format.  This isn't so difficult, it just requires careful attention in a
230// few places.
231
232// These represent eight bits of memory.
233#define VA_BITS2_NOACCESS     0x0      // 00b
234#define VA_BITS2_UNDEFINED    0x1      // 01b
235#define VA_BITS2_DEFINED      0x2      // 10b
236#define VA_BITS2_PARTDEFINED  0x3      // 11b
237
238// These represent 16 bits of memory.
239#define VA_BITS4_NOACCESS     0x0      // 00_00b
240#define VA_BITS4_UNDEFINED    0x5      // 01_01b
241#define VA_BITS4_DEFINED      0xa      // 10_10b
242
243// These represent 32 bits of memory.
244#define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
245#define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
246#define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
247
248// These represent 64 bits of memory.
249#define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
250#define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
251#define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
252
253
254#define SM_CHUNKS             16384
255#define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
256#define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
257
258// Paranoia:  it's critical for performance that the requested inlining
259// occurs.  So try extra hard.
260#define INLINE    inline __attribute__((always_inline))
261
262static INLINE Addr start_of_this_sm ( Addr a ) {
263   return (a & (~SM_MASK));
264}
265static INLINE Bool is_start_of_sm ( Addr a ) {
266   return (start_of_this_sm(a) == a);
267}
268
269typedef
270   struct {
271      UChar vabits8[SM_CHUNKS];
272   }
273   SecMap;
274
275// 3 distinguished secondary maps, one for no-access, one for
276// accessible but undefined, and one for accessible and defined.
277// Distinguished secondaries may never be modified.
278#define SM_DIST_NOACCESS   0
279#define SM_DIST_UNDEFINED  1
280#define SM_DIST_DEFINED    2
281
282static SecMap sm_distinguished[3];
283
284static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
285   return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
286}
287
288// Forward declaration
289static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
290
291/* dist_sm points to one of our three distinguished secondaries.  Make
292   a copy of it so that we can write to it.
293*/
294static SecMap* copy_for_writing ( SecMap* dist_sm )
295{
296   SecMap* new_sm;
297   tl_assert(dist_sm == &sm_distinguished[0]
298          || dist_sm == &sm_distinguished[1]
299          || dist_sm == &sm_distinguished[2]);
300
301   new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
302   if (new_sm == NULL)
303      VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
304                                   sizeof(SecMap) );
305   VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
306   update_SM_counts(dist_sm, new_sm);
307   return new_sm;
308}
309
310/* --------------- Stats --------------- */
311
312static Int   n_issued_SMs      = 0;
313static Int   n_deissued_SMs    = 0;
314static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
315static Int   n_undefined_SMs   = 0;
316static Int   n_defined_SMs     = 0;
317static Int   n_non_DSM_SMs     = 0;
318static Int   max_noaccess_SMs  = 0;
319static Int   max_undefined_SMs = 0;
320static Int   max_defined_SMs   = 0;
321static Int   max_non_DSM_SMs   = 0;
322
323/* # searches initiated in auxmap_L1, and # base cmps required */
324static ULong n_auxmap_L1_searches  = 0;
325static ULong n_auxmap_L1_cmps      = 0;
326/* # of searches that missed in auxmap_L1 and therefore had to
327   be handed to auxmap_L2. And the number of nodes inserted. */
328static ULong n_auxmap_L2_searches  = 0;
329static ULong n_auxmap_L2_nodes     = 0;
330
331static Int   n_sanity_cheap     = 0;
332static Int   n_sanity_expensive = 0;
333
334static Int   n_secVBit_nodes   = 0;
335static Int   max_secVBit_nodes = 0;
336
337static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
338{
339   if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
340   else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
341   else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
342   else                                                  { n_non_DSM_SMs  --;
343                                                           n_deissued_SMs ++; }
344
345   if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
346   else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
347   else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
348   else                                                  { n_non_DSM_SMs  ++;
349                                                           n_issued_SMs   ++; }
350
351   if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
352   if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
353   if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
354   if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
355}
356
357/* --------------- Primary maps --------------- */
358
359/* The main primary map.  This covers some initial part of the address
360   space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
361   handled using the auxiliary primary map.
362*/
363static SecMap* primary_map[N_PRIMARY_MAP];
364
365
366/* An entry in the auxiliary primary map.  base must be a 64k-aligned
367   value, and sm points at the relevant secondary map.  As with the
368   main primary map, the secondary may be either a real secondary, or
369   one of the three distinguished secondaries.  DO NOT CHANGE THIS
370   LAYOUT: the first word has to be the key for OSet fast lookups.
371*/
372typedef
373   struct {
374      Addr    base;
375      SecMap* sm;
376   }
377   AuxMapEnt;
378
379/* Tunable parameter: How big is the L1 queue? */
380#define N_AUXMAP_L1 24
381
382/* Tunable parameter: How far along the L1 queue to insert
383   entries resulting from L2 lookups? */
384#define AUXMAP_L1_INSERT_IX 12
385
386static struct {
387          Addr       base;
388          AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
389       }
390       auxmap_L1[N_AUXMAP_L1];
391
392static OSet* auxmap_L2 = NULL;
393
394static void init_auxmap_L1_L2 ( void )
395{
396   Int i;
397   for (i = 0; i < N_AUXMAP_L1; i++) {
398      auxmap_L1[i].base = 0;
399      auxmap_L1[i].ent  = NULL;
400   }
401
402   tl_assert(0 == offsetof(AuxMapEnt,base));
403   tl_assert(sizeof(Addr) == sizeof(void*));
404   auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
405                                    /*fastCmp*/ NULL,
406                                    VG_(malloc), "mc.iaLL.1", VG_(free) );
407}
408
409/* Check representation invariants; if OK return NULL; else a
410   descriptive bit of text.  Also return the number of
411   non-distinguished secondary maps referred to from the auxiliary
412   primary maps. */
413
414static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
415{
416   Word i, j;
417   /* On a 32-bit platform, the L2 and L1 tables should
418      both remain empty forever.
419
420      On a 64-bit platform:
421      In the L2 table:
422       all .base & 0xFFFF == 0
423       all .base > MAX_PRIMARY_ADDRESS
424      In the L1 table:
425       all .base & 0xFFFF == 0
426       all (.base > MAX_PRIMARY_ADDRESS
427            .base & 0xFFFF == 0
428            and .ent points to an AuxMapEnt with the same .base)
429           or
430           (.base == 0 and .ent == NULL)
431   */
432   *n_secmaps_found = 0;
433   if (sizeof(void*) == 4) {
434      /* 32-bit platform */
435      if (VG_(OSetGen_Size)(auxmap_L2) != 0)
436         return "32-bit: auxmap_L2 is non-empty";
437      for (i = 0; i < N_AUXMAP_L1; i++)
438        if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
439      return "32-bit: auxmap_L1 is non-empty";
440   } else {
441      /* 64-bit platform */
442      UWord elems_seen = 0;
443      AuxMapEnt *elem, *res;
444      AuxMapEnt key;
445      /* L2 table */
446      VG_(OSetGen_ResetIter)(auxmap_L2);
447      while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
448         elems_seen++;
449         if (0 != (elem->base & (Addr)0xFFFF))
450            return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
451         if (elem->base <= MAX_PRIMARY_ADDRESS)
452            return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
453         if (elem->sm == NULL)
454            return "64-bit: .sm in _L2 is NULL";
455         if (!is_distinguished_sm(elem->sm))
456            (*n_secmaps_found)++;
457      }
458      if (elems_seen != n_auxmap_L2_nodes)
459         return "64-bit: disagreement on number of elems in _L2";
460      /* Check L1-L2 correspondence */
461      for (i = 0; i < N_AUXMAP_L1; i++) {
462         if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
463            continue;
464         if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
465            return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
466         if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
467            return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
468         if (auxmap_L1[i].ent == NULL)
469            return "64-bit: .ent is NULL in auxmap_L1";
470         if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
471            return "64-bit: _L1 and _L2 bases are inconsistent";
472         /* Look it up in auxmap_L2. */
473         key.base = auxmap_L1[i].base;
474         key.sm   = 0;
475         res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
476         if (res == NULL)
477            return "64-bit: _L1 .base not found in _L2";
478         if (res != auxmap_L1[i].ent)
479            return "64-bit: _L1 .ent disagrees with _L2 entry";
480      }
481      /* Check L1 contains no duplicates */
482      for (i = 0; i < N_AUXMAP_L1; i++) {
483         if (auxmap_L1[i].base == 0)
484            continue;
485	 for (j = i+1; j < N_AUXMAP_L1; j++) {
486            if (auxmap_L1[j].base == 0)
487               continue;
488            if (auxmap_L1[j].base == auxmap_L1[i].base)
489               return "64-bit: duplicate _L1 .base entries";
490         }
491      }
492   }
493   return NULL; /* ok */
494}
495
496static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
497{
498   Word i;
499   tl_assert(ent);
500   tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
501   for (i = N_AUXMAP_L1-1; i > rank; i--)
502      auxmap_L1[i] = auxmap_L1[i-1];
503   auxmap_L1[rank].base = ent->base;
504   auxmap_L1[rank].ent  = ent;
505}
506
507static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
508{
509   AuxMapEnt  key;
510   AuxMapEnt* res;
511   Word       i;
512
513   tl_assert(a > MAX_PRIMARY_ADDRESS);
514   a &= ~(Addr)0xFFFF;
515
516   /* First search the front-cache, which is a self-organising
517      list containing the most popular entries. */
518
519   if (LIKELY(auxmap_L1[0].base == a))
520      return auxmap_L1[0].ent;
521   if (LIKELY(auxmap_L1[1].base == a)) {
522      Addr       t_base = auxmap_L1[0].base;
523      AuxMapEnt* t_ent  = auxmap_L1[0].ent;
524      auxmap_L1[0].base = auxmap_L1[1].base;
525      auxmap_L1[0].ent  = auxmap_L1[1].ent;
526      auxmap_L1[1].base = t_base;
527      auxmap_L1[1].ent  = t_ent;
528      return auxmap_L1[0].ent;
529   }
530
531   n_auxmap_L1_searches++;
532
533   for (i = 0; i < N_AUXMAP_L1; i++) {
534      if (auxmap_L1[i].base == a) {
535         break;
536      }
537   }
538   tl_assert(i >= 0 && i <= N_AUXMAP_L1);
539
540   n_auxmap_L1_cmps += (ULong)(i+1);
541
542   if (i < N_AUXMAP_L1) {
543      if (i > 0) {
544         Addr       t_base = auxmap_L1[i-1].base;
545         AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
546         auxmap_L1[i-1].base = auxmap_L1[i-0].base;
547         auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
548         auxmap_L1[i-0].base = t_base;
549         auxmap_L1[i-0].ent  = t_ent;
550         i--;
551      }
552      return auxmap_L1[i].ent;
553   }
554
555   n_auxmap_L2_searches++;
556
557   /* First see if we already have it. */
558   key.base = a;
559   key.sm   = 0;
560
561   res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
562   if (res)
563      insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
564   return res;
565}
566
567static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
568{
569   AuxMapEnt *nyu, *res;
570
571   /* First see if we already have it. */
572   res = maybe_find_in_auxmap( a );
573   if (LIKELY(res))
574      return res;
575
576   /* Ok, there's no entry in the secondary map, so we'll have
577      to allocate one. */
578   a &= ~(Addr)0xFFFF;
579
580   nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
581   nyu->base = a;
582   nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
583   VG_(OSetGen_Insert)( auxmap_L2, nyu );
584   insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
585   n_auxmap_L2_nodes++;
586   return nyu;
587}
588
589/* --------------- SecMap fundamentals --------------- */
590
591// In all these, 'low' means it's definitely in the main primary map,
592// 'high' means it's definitely in the auxiliary table.
593
594static INLINE SecMap** get_secmap_low_ptr ( Addr a )
595{
596   UWord pm_off = a >> 16;
597#  if VG_DEBUG_MEMORY >= 1
598   tl_assert(pm_off < N_PRIMARY_MAP);
599#  endif
600   return &primary_map[ pm_off ];
601}
602
603static INLINE SecMap** get_secmap_high_ptr ( Addr a )
604{
605   AuxMapEnt* am = find_or_alloc_in_auxmap(a);
606   return &am->sm;
607}
608
609static INLINE SecMap** get_secmap_ptr ( Addr a )
610{
611   return ( a <= MAX_PRIMARY_ADDRESS
612          ? get_secmap_low_ptr(a)
613          : get_secmap_high_ptr(a));
614}
615
616static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
617{
618   return *get_secmap_low_ptr(a);
619}
620
621static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
622{
623   return *get_secmap_high_ptr(a);
624}
625
626static INLINE SecMap* get_secmap_for_writing_low(Addr a)
627{
628   SecMap** p = get_secmap_low_ptr(a);
629   if (UNLIKELY(is_distinguished_sm(*p)))
630      *p = copy_for_writing(*p);
631   return *p;
632}
633
634static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
635{
636   SecMap** p = get_secmap_high_ptr(a);
637   if (UNLIKELY(is_distinguished_sm(*p)))
638      *p = copy_for_writing(*p);
639   return *p;
640}
641
642/* Produce the secmap for 'a', either from the primary map or by
643   ensuring there is an entry for it in the aux primary map.  The
644   secmap may be a distinguished one as the caller will only want to
645   be able to read it.
646*/
647static INLINE SecMap* get_secmap_for_reading ( Addr a )
648{
649   return ( a <= MAX_PRIMARY_ADDRESS
650          ? get_secmap_for_reading_low (a)
651          : get_secmap_for_reading_high(a) );
652}
653
654/* Produce the secmap for 'a', either from the primary map or by
655   ensuring there is an entry for it in the aux primary map.  The
656   secmap may not be a distinguished one, since the caller will want
657   to be able to write it.  If it is a distinguished secondary, make a
658   writable copy of it, install it, and return the copy instead.  (COW
659   semantics).
660*/
661static INLINE SecMap* get_secmap_for_writing ( Addr a )
662{
663   return ( a <= MAX_PRIMARY_ADDRESS
664          ? get_secmap_for_writing_low (a)
665          : get_secmap_for_writing_high(a) );
666}
667
668/* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
669   allocate one if one doesn't already exist.  This is used by the
670   leak checker.
671*/
672static SecMap* maybe_get_secmap_for ( Addr a )
673{
674   if (a <= MAX_PRIMARY_ADDRESS) {
675      return get_secmap_for_reading_low(a);
676   } else {
677      AuxMapEnt* am = maybe_find_in_auxmap(a);
678      return am ? am->sm : NULL;
679   }
680}
681
682/* --------------- Fundamental functions --------------- */
683
684static INLINE
685void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
686{
687   UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
688   *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
689   *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
690}
691
692static INLINE
693void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
694{
695   UInt shift;
696   tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
697   shift     =  (a & 2)   << 1;        // shift by 0 or 4
698   *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
699   *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
700}
701
702static INLINE
703UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
704{
705   UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
706   vabits8 >>= shift;                  // shift the two bits to the bottom
707   return 0x3 & vabits8;               // mask out the rest
708}
709
710static INLINE
711UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
712{
713   UInt shift;
714   tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
715   shift = (a & 2) << 1;               // shift by 0 or 4
716   vabits8 >>= shift;                  // shift the four bits to the bottom
717   return 0xf & vabits8;               // mask out the rest
718}
719
720// Note that these four are only used in slow cases.  The fast cases do
721// clever things like combine the auxmap check (in
722// get_secmap_{read,writ}able) with alignment checks.
723
724// *** WARNING! ***
725// Any time this function is called, if it is possible that vabits2
726// is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
727// sec-V-bits table must also be set!
728static INLINE
729void set_vabits2 ( Addr a, UChar vabits2 )
730{
731   SecMap* sm       = get_secmap_for_writing(a);
732   UWord   sm_off   = SM_OFF(a);
733   insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
734}
735
736static INLINE
737UChar get_vabits2 ( Addr a )
738{
739   SecMap* sm       = get_secmap_for_reading(a);
740   UWord   sm_off   = SM_OFF(a);
741   UChar   vabits8  = sm->vabits8[sm_off];
742   return extract_vabits2_from_vabits8(a, vabits8);
743}
744
745// *** WARNING! ***
746// Any time this function is called, if it is possible that any of the
747// 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
748// corresponding entry(s) in the sec-V-bits table must also be set!
749static INLINE
750UChar get_vabits8_for_aligned_word32 ( Addr a )
751{
752   SecMap* sm       = get_secmap_for_reading(a);
753   UWord   sm_off   = SM_OFF(a);
754   UChar   vabits8  = sm->vabits8[sm_off];
755   return vabits8;
756}
757
758static INLINE
759void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
760{
761   SecMap* sm       = get_secmap_for_writing(a);
762   UWord   sm_off   = SM_OFF(a);
763   sm->vabits8[sm_off] = vabits8;
764}
765
766
767// Forward declarations
768static UWord get_sec_vbits8(Addr a);
769static void  set_sec_vbits8(Addr a, UWord vbits8);
770
771// Returns False if there was an addressability error.
772static INLINE
773Bool set_vbits8 ( Addr a, UChar vbits8 )
774{
775   Bool  ok      = True;
776   UChar vabits2 = get_vabits2(a);
777   if ( VA_BITS2_NOACCESS != vabits2 ) {
778      // Addressable.  Convert in-register format to in-memory format.
779      // Also remove any existing sec V bit entry for the byte if no
780      // longer necessary.
781      if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
782      else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
783      else                                    { vabits2 = VA_BITS2_PARTDEFINED;
784                                                set_sec_vbits8(a, vbits8);  }
785      set_vabits2(a, vabits2);
786
787   } else {
788      // Unaddressable!  Do nothing -- when writing to unaddressable
789      // memory it acts as a black hole, and the V bits can never be seen
790      // again.  So we don't have to write them at all.
791      ok = False;
792   }
793   return ok;
794}
795
796// Returns False if there was an addressability error.  In that case, we put
797// all defined bits into vbits8.
798static INLINE
799Bool get_vbits8 ( Addr a, UChar* vbits8 )
800{
801   Bool  ok      = True;
802   UChar vabits2 = get_vabits2(a);
803
804   // Convert the in-memory format to in-register format.
805   if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
806   else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
807   else if ( VA_BITS2_NOACCESS  == vabits2 ) {
808      *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
809      ok = False;
810   } else {
811      tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
812      *vbits8 = get_sec_vbits8(a);
813   }
814   return ok;
815}
816
817
818/* --------------- Secondary V bit table ------------ */
819
820// This table holds the full V bit pattern for partially-defined bytes
821// (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
822// memory.
823//
824// Note: the nodes in this table can become stale.  Eg. if you write a PDB,
825// then overwrite the same address with a fully defined byte, the sec-V-bit
826// node will not necessarily be removed.  This is because checking for
827// whether removal is necessary would slow down the fast paths.
828//
829// To avoid the stale nodes building up too much, we periodically (once the
830// table reaches a certain size) garbage collect (GC) the table by
831// traversing it and evicting any nodes not having PDB.
832// If more than a certain proportion of nodes survived, we increase the
833// table size so that GCs occur less often.
834//
835// This policy is designed to avoid bad table bloat in the worst case where
836// a program creates huge numbers of stale PDBs -- we would get this bloat
837// if we had no GC -- while handling well the case where a node becomes
838// stale but shortly afterwards is rewritten with a PDB and so becomes
839// non-stale again (which happens quite often, eg. in perf/bz2).  If we just
840// remove all stale nodes as soon as possible, we just end up re-adding a
841// lot of them in later again.  The "sufficiently stale" approach avoids
842// this.  (If a program has many live PDBs, performance will just suck,
843// there's no way around that.)
844//
845// Further comments, JRS 14 Feb 2012.  It turns out that the policy of
846// holding on to stale entries for 2 GCs before discarding them can lead
847// to massive space leaks.  So we're changing to an arrangement where
848// lines are evicted as soon as they are observed to be stale during a
849// GC.  This also has a side benefit of allowing the sufficiently_stale
850// field to be removed from the SecVBitNode struct, reducing its size by
851// 8 bytes, which is a substantial space saving considering that the
852// struct was previously 32 or so bytes, on a 64 bit target.
853//
854// In order to try and mitigate the problem that the "sufficiently stale"
855// heuristic was designed to avoid, the table size is allowed to drift
856// up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
857// means that nodes will exist in the table longer on average, and hopefully
858// will be deleted and re-added less frequently.
859//
860// The previous scaling up mechanism (now called STEPUP) is retained:
861// if residency exceeds 50%, the table is scaled up, although by a
862// factor sqrt(2) rather than 2 as before.  This effectively doubles the
863// frequency of GCs when there are many PDBs at reduces the tendency of
864// stale PDBs to reside for long periods in the table.
865
866static OSet* secVBitTable;
867
868// Stats
869static ULong sec_vbits_new_nodes = 0;
870static ULong sec_vbits_updates   = 0;
871
872// This must be a power of two;  this is checked in mc_pre_clo_init().
873// The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
874// a larger address range) they take more space but we can get multiple
875// partially-defined bytes in one if they are close to each other, reducing
876// the number of total nodes.  In practice sometimes they are clustered (eg.
877// perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
878// row), but often not.  So we choose something intermediate.
879#define BYTES_PER_SEC_VBIT_NODE     16
880
881// We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
882// more than this many nodes survive a GC.
883#define STEPUP_SURVIVOR_PROPORTION  0.5
884#define STEPUP_GROWTH_FACTOR        1.414213562
885
886// If the above heuristic doesn't apply, then we may make the table
887// slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
888// this many nodes survive a GC, _and_ the total table size does
889// not exceed a fixed limit.  The numbers are somewhat arbitrary, but
890// work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
891// effectively although gradually reduces residency and increases time
892// between GCs for programs with small numbers of PDBs.  The 80000 limit
893// effectively limits the table size to around 2MB for programs with
894// small numbers of PDBs, whilst giving a reasonably long lifetime to
895// entries, to try and reduce the costs resulting from deleting and
896// re-adding of entries.
897#define DRIFTUP_SURVIVOR_PROPORTION 0.15
898#define DRIFTUP_GROWTH_FACTOR       1.015
899#define DRIFTUP_MAX_SIZE            80000
900
901// We GC the table when it gets this many nodes in it, ie. it's effectively
902// the table size.  It can change.
903static Int  secVBitLimit = 1000;
904
905// The number of GCs done, used to age sec-V-bit nodes for eviction.
906// Because it's unsigned, wrapping doesn't matter -- the right answer will
907// come out anyway.
908static UInt GCs_done = 0;
909
910typedef
911   struct {
912      Addr  a;
913      UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
914   }
915   SecVBitNode;
916
917static OSet* createSecVBitTable(void)
918{
919   OSet* newSecVBitTable;
920   newSecVBitTable = VG_(OSetGen_Create_With_Pool)
921      ( offsetof(SecVBitNode, a),
922        NULL, // use fast comparisons
923        VG_(malloc), "mc.cSVT.1 (sec VBit table)",
924        VG_(free),
925        1000,
926        sizeof(SecVBitNode));
927   return newSecVBitTable;
928}
929
930static void gcSecVBitTable(void)
931{
932   OSet*        secVBitTable2;
933   SecVBitNode* n;
934   Int          i, n_nodes = 0, n_survivors = 0;
935
936   GCs_done++;
937
938   // Create the new table.
939   secVBitTable2 = createSecVBitTable();
940
941   // Traverse the table, moving fresh nodes into the new table.
942   VG_(OSetGen_ResetIter)(secVBitTable);
943   while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
944      // Keep node if any of its bytes are non-stale.  Using
945      // get_vabits2() for the lookup is not very efficient, but I don't
946      // think it matters.
947      for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
948         if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
949            // Found a non-stale byte, so keep =>
950            // Insert a copy of the node into the new table.
951            SecVBitNode* n2 =
952               VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
953            *n2 = *n;
954            VG_(OSetGen_Insert)(secVBitTable2, n2);
955            break;
956         }
957      }
958   }
959
960   // Get the before and after sizes.
961   n_nodes     = VG_(OSetGen_Size)(secVBitTable);
962   n_survivors = VG_(OSetGen_Size)(secVBitTable2);
963
964   // Destroy the old table, and put the new one in its place.
965   VG_(OSetGen_Destroy)(secVBitTable);
966   secVBitTable = secVBitTable2;
967
968   if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
969      VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
970                   n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
971   }
972
973   // Increase table size if necessary.
974   if ((Double)n_survivors
975       > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
976      secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
977      if (VG_(clo_verbosity) > 1)
978         VG_(message)(Vg_DebugMsg,
979                      "memcheck GC: %d new table size (stepup)\n",
980                      secVBitLimit);
981   }
982   else
983   if (secVBitLimit < DRIFTUP_MAX_SIZE
984       && (Double)n_survivors
985          > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
986      secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
987      if (VG_(clo_verbosity) > 1)
988         VG_(message)(Vg_DebugMsg,
989                      "memcheck GC: %d new table size (driftup)\n",
990                      secVBitLimit);
991   }
992}
993
994static UWord get_sec_vbits8(Addr a)
995{
996   Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
997   Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
998   SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
999   UChar        vbits8;
1000   tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1001   // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1002   // make it to the secondary V bits table.
1003   vbits8 = n->vbits8[amod];
1004   tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1005   return vbits8;
1006}
1007
1008static void set_sec_vbits8(Addr a, UWord vbits8)
1009{
1010   Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1011   Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
1012   SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1013   // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1014   // make it to the secondary V bits table.
1015   tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1016   if (n) {
1017      n->vbits8[amod] = vbits8;     // update
1018      sec_vbits_updates++;
1019   } else {
1020      // Do a table GC if necessary.  Nb: do this before creating and
1021      // inserting the new node, to avoid erroneously GC'ing the new node.
1022      if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1023         gcSecVBitTable();
1024      }
1025
1026      // New node:  assign the specific byte, make the rest invalid (they
1027      // should never be read as-is, but be cautious).
1028      n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1029      n->a            = aAligned;
1030      for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1031         n->vbits8[i] = V_BITS8_UNDEFINED;
1032      }
1033      n->vbits8[amod] = vbits8;
1034
1035      // Insert the new node.
1036      VG_(OSetGen_Insert)(secVBitTable, n);
1037      sec_vbits_new_nodes++;
1038
1039      n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1040      if (n_secVBit_nodes > max_secVBit_nodes)
1041         max_secVBit_nodes = n_secVBit_nodes;
1042   }
1043}
1044
1045/* --------------- Endianness helpers --------------- */
1046
1047/* Returns the offset in memory of the byteno-th most significant byte
1048   in a wordszB-sized word, given the specified endianness. */
1049static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1050                                    UWord byteno ) {
1051   return bigendian ? (wordszB-1-byteno) : byteno;
1052}
1053
1054
1055/* --------------- Ignored address ranges --------------- */
1056
1057/* Denotes the address-error-reportability status for address ranges:
1058   IAR_NotIgnored:  the usual case -- report errors in this range
1059   IAR_CommandLine: don't report errors -- from command line setting
1060   IAR_ClientReq:   don't report errors -- from client request
1061*/
1062typedef
1063   enum { IAR_INVALID=99,
1064          IAR_NotIgnored,
1065          IAR_CommandLine,
1066          IAR_ClientReq }
1067   IARKind;
1068
1069static const HChar* showIARKind ( IARKind iark )
1070{
1071   switch (iark) {
1072      case IAR_INVALID:     return "INVALID";
1073      case IAR_NotIgnored:  return "NotIgnored";
1074      case IAR_CommandLine: return "CommandLine";
1075      case IAR_ClientReq:   return "ClientReq";
1076      default:              return "???";
1077   }
1078}
1079
1080// RangeMap<IARKind>
1081static RangeMap* gIgnoredAddressRanges = NULL;
1082
1083static void init_gIgnoredAddressRanges ( void )
1084{
1085   if (LIKELY(gIgnoredAddressRanges != NULL))
1086      return;
1087   gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1088                                             VG_(free), IAR_NotIgnored );
1089}
1090
1091Bool MC_(in_ignored_range) ( Addr a )
1092{
1093   if (LIKELY(gIgnoredAddressRanges == NULL))
1094      return False;
1095   UWord how     = IAR_INVALID;
1096   UWord key_min = ~(UWord)0;
1097   UWord key_max =  (UWord)0;
1098   VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1099   tl_assert(key_min <= a && a <= key_max);
1100   switch (how) {
1101      case IAR_NotIgnored:  return False;
1102      case IAR_CommandLine: return True;
1103      case IAR_ClientReq:   return True;
1104      default: break; /* invalid */
1105   }
1106   VG_(tool_panic)("MC_(in_ignore_range)");
1107   /*NOTREACHED*/
1108}
1109
1110/* Parse two Addr separated by a dash, or fail. */
1111
1112static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
1113{
1114   Bool ok = VG_(parse_Addr) (ppc, result1);
1115   if (!ok)
1116      return False;
1117   if (**ppc != '-')
1118      return False;
1119   (*ppc)++;
1120   ok = VG_(parse_Addr) (ppc, result2);
1121   if (!ok)
1122      return False;
1123   return True;
1124}
1125
1126/* Parse a set of ranges separated by commas into 'ignoreRanges', or
1127   fail.  If they are valid, add them to the global set of ignored
1128   ranges. */
1129static Bool parse_ignore_ranges ( const HChar* str0 )
1130{
1131   init_gIgnoredAddressRanges();
1132   const HChar*  str = str0;
1133   const HChar** ppc = &str;
1134   while (1) {
1135      Addr start = ~(Addr)0;
1136      Addr end   = (Addr)0;
1137      Bool ok    = parse_range(ppc, &start, &end);
1138      if (!ok)
1139         return False;
1140      if (start > end)
1141         return False;
1142      VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1143      if (**ppc == 0)
1144         return True;
1145      if (**ppc != ',')
1146         return False;
1147      (*ppc)++;
1148   }
1149   /*NOTREACHED*/
1150   return False;
1151}
1152
1153/* Add or remove [start, +len) from the set of ignored ranges. */
1154static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1155{
1156   init_gIgnoredAddressRanges();
1157   const Bool verbose = (VG_(clo_verbosity) > 1);
1158   if (len == 0) {
1159      return False;
1160   }
1161   if (addRange) {
1162      VG_(bindRangeMap)(gIgnoredAddressRanges,
1163                        start, start+len-1, IAR_ClientReq);
1164      if (verbose)
1165         VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1166                   (void*)start, (void*)(start+len-1));
1167   } else {
1168      VG_(bindRangeMap)(gIgnoredAddressRanges,
1169                        start, start+len-1, IAR_NotIgnored);
1170      if (verbose)
1171         VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1172                   (void*)start, (void*)(start+len-1));
1173   }
1174   if (verbose) {
1175      VG_(dmsg)("memcheck:   now have %ld ranges:\n",
1176                VG_(sizeRangeMap)(gIgnoredAddressRanges));
1177      Word i;
1178      for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1179         UWord val     = IAR_INVALID;
1180         UWord key_min = ~(UWord)0;
1181         UWord key_max = (UWord)0;
1182         VG_(indexRangeMap)( &key_min, &key_max, &val,
1183                             gIgnoredAddressRanges, i );
1184         VG_(dmsg)("memcheck:      [%ld]  %016llx-%016llx  %s\n",
1185                   i, (ULong)key_min, (ULong)key_max, showIARKind(val));
1186      }
1187   }
1188   return True;
1189}
1190
1191
1192/* --------------- Load/store slow cases. --------------- */
1193
1194static
1195__attribute__((noinline))
1196void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1197                                Addr a, SizeT nBits, Bool bigendian )
1198{
1199   ULong  pessim[4];     /* only used when p-l-ok=yes */
1200   SSizeT szB            = nBits / 8;
1201   SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
1202   SSizeT i, j;          /* Must be signed. */
1203   SizeT  n_addrs_bad = 0;
1204   Addr   ai;
1205   UChar  vbits8;
1206   Bool   ok;
1207
1208   /* Code below assumes load size is a power of two and at least 64
1209      bits. */
1210   tl_assert((szB & (szB-1)) == 0 && szL > 0);
1211
1212   /* If this triggers, you probably just need to increase the size of
1213      the pessim array. */
1214   tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1215
1216   for (j = 0; j < szL; j++) {
1217      pessim[j] = V_BITS64_DEFINED;
1218      res[j] = V_BITS64_UNDEFINED;
1219   }
1220
1221   /* Make up a result V word, which contains the loaded data for
1222      valid addresses and Defined for invalid addresses.  Iterate over
1223      the bytes in the word, from the most significant down to the
1224      least.  The vbits to return are calculated into vbits128.  Also
1225      compute the pessimising value to be used when
1226      --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1227      info can be gleaned from the pessim array) but is used as a
1228      cross-check. */
1229   for (j = szL-1; j >= 0; j--) {
1230      ULong vbits64    = V_BITS64_UNDEFINED;
1231      ULong pessim64   = V_BITS64_DEFINED;
1232      UWord long_index = byte_offset_w(szL, bigendian, j);
1233      for (i = 8-1; i >= 0; i--) {
1234         PROF_EVENT(29, "mc_LOADV_128_or_256_slow(loop)");
1235         ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1236         ok = get_vbits8(ai, &vbits8);
1237         vbits64 <<= 8;
1238         vbits64 |= vbits8;
1239         if (!ok) n_addrs_bad++;
1240         pessim64 <<= 8;
1241         pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1242      }
1243      res[long_index] = vbits64;
1244      pessim[long_index] = pessim64;
1245   }
1246
1247   /* In the common case, all the addresses involved are valid, so we
1248      just return the computed V bits and have done. */
1249   if (LIKELY(n_addrs_bad == 0))
1250      return;
1251
1252   /* If there's no possibility of getting a partial-loads-ok
1253      exemption, report the error and quit. */
1254   if (!MC_(clo_partial_loads_ok)) {
1255      MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1256      return;
1257   }
1258
1259   /* The partial-loads-ok excemption might apply.  Find out if it
1260      does.  If so, don't report an addressing error, but do return
1261      Undefined for the bytes that are out of range, so as to avoid
1262      false negatives.  If it doesn't apply, just report an addressing
1263      error in the usual way. */
1264
1265   /* Some code steps along byte strings in aligned chunks
1266      even when there is only a partially defined word at the end (eg,
1267      optimised strlen).  This is allowed by the memory model of
1268      modern machines, since an aligned load cannot span two pages and
1269      thus cannot "partially fault".
1270
1271      Therefore, a load from a partially-addressible place is allowed
1272      if all of the following hold:
1273      - the command-line flag is set [by default, it isn't]
1274      - it's an aligned load
1275      - at least one of the addresses in the word *is* valid
1276
1277      Since this suppresses the addressing error, we avoid false
1278      negatives by marking bytes undefined when they come from an
1279      invalid address.
1280   */
1281
1282   /* "at least one of the addresses is invalid" */
1283   ok = False;
1284   for (j = 0; j < szL; j++)
1285      ok |= pessim[j] != V_BITS64_DEFINED;
1286   tl_assert(ok);
1287
1288   if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1289      /* Exemption applies.  Use the previously computed pessimising
1290         value and return the combined result, but don't flag an
1291         addressing error.  The pessimising value is Defined for valid
1292         addresses and Undefined for invalid addresses. */
1293      /* for assumption that doing bitwise or implements UifU */
1294      tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1295      /* (really need "UifU" here...)
1296         vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
1297      for (j = szL-1; j >= 0; j--)
1298         res[j] |= pessim[j];
1299      return;
1300   }
1301
1302   /* Exemption doesn't apply.  Flag an addressing error in the normal
1303      way. */
1304   MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1305}
1306
1307
1308static
1309__attribute__((noinline))
1310ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1311{
1312   PROF_EVENT(30, "mc_LOADVn_slow");
1313
1314   /* ------------ BEGIN semi-fast cases ------------ */
1315   /* These deal quickly-ish with the common auxiliary primary map
1316      cases on 64-bit platforms.  Are merely a speedup hack; can be
1317      omitted without loss of correctness/functionality.  Note that in
1318      both cases the "sizeof(void*) == 8" causes these cases to be
1319      folded out by compilers on 32-bit platforms.  These are derived
1320      from LOADV64 and LOADV32.
1321   */
1322   if (LIKELY(sizeof(void*) == 8
1323                      && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1324      SecMap* sm       = get_secmap_for_reading(a);
1325      UWord   sm_off16 = SM_OFF_16(a);
1326      UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1327      if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1328         return V_BITS64_DEFINED;
1329      if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1330         return V_BITS64_UNDEFINED;
1331      /* else fall into the slow case */
1332   }
1333   if (LIKELY(sizeof(void*) == 8
1334                      && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1335      SecMap* sm = get_secmap_for_reading(a);
1336      UWord sm_off = SM_OFF(a);
1337      UWord vabits8 = sm->vabits8[sm_off];
1338      if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1339         return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1340      if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1341         return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1342      /* else fall into slow case */
1343   }
1344   /* ------------ END semi-fast cases ------------ */
1345
1346   ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
1347   ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
1348   SSizeT szB         = nBits / 8;
1349   SSizeT i;          /* Must be signed. */
1350   SizeT  n_addrs_bad = 0;
1351   Addr   ai;
1352   UChar  vbits8;
1353   Bool   ok;
1354
1355   tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1356
1357   /* Make up a 64-bit result V word, which contains the loaded data
1358      for valid addresses and Defined for invalid addresses.  Iterate
1359      over the bytes in the word, from the most significant down to
1360      the least.  The vbits to return are calculated into vbits64.
1361      Also compute the pessimising value to be used when
1362      --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1363      info can be gleaned from pessim64) but is used as a
1364      cross-check. */
1365   for (i = szB-1; i >= 0; i--) {
1366      PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1367      ai = a + byte_offset_w(szB, bigendian, i);
1368      ok = get_vbits8(ai, &vbits8);
1369      vbits64 <<= 8;
1370      vbits64 |= vbits8;
1371      if (!ok) n_addrs_bad++;
1372      pessim64 <<= 8;
1373      pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1374   }
1375
1376   /* In the common case, all the addresses involved are valid, so we
1377      just return the computed V bits and have done. */
1378   if (LIKELY(n_addrs_bad == 0))
1379      return vbits64;
1380
1381   /* If there's no possibility of getting a partial-loads-ok
1382      exemption, report the error and quit. */
1383   if (!MC_(clo_partial_loads_ok)) {
1384      MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1385      return vbits64;
1386   }
1387
1388   /* The partial-loads-ok excemption might apply.  Find out if it
1389      does.  If so, don't report an addressing error, but do return
1390      Undefined for the bytes that are out of range, so as to avoid
1391      false negatives.  If it doesn't apply, just report an addressing
1392      error in the usual way. */
1393
1394   /* Some code steps along byte strings in aligned word-sized chunks
1395      even when there is only a partially defined word at the end (eg,
1396      optimised strlen).  This is allowed by the memory model of
1397      modern machines, since an aligned load cannot span two pages and
1398      thus cannot "partially fault".  Despite such behaviour being
1399      declared undefined by ANSI C/C++.
1400
1401      Therefore, a load from a partially-addressible place is allowed
1402      if all of the following hold:
1403      - the command-line flag is set [by default, it isn't]
1404      - it's a word-sized, word-aligned load
1405      - at least one of the addresses in the word *is* valid
1406
1407      Since this suppresses the addressing error, we avoid false
1408      negatives by marking bytes undefined when they come from an
1409      invalid address.
1410   */
1411
1412   /* "at least one of the addresses is invalid" */
1413   tl_assert(pessim64 != V_BITS64_DEFINED);
1414
1415   if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1416       && n_addrs_bad < VG_WORDSIZE) {
1417      /* Exemption applies.  Use the previously computed pessimising
1418         value for vbits64 and return the combined result, but don't
1419         flag an addressing error.  The pessimising value is Defined
1420         for valid addresses and Undefined for invalid addresses. */
1421      /* for assumption that doing bitwise or implements UifU */
1422      tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1423      /* (really need "UifU" here...)
1424         vbits64 UifU= pessim64  (is pessimised by it, iow) */
1425      vbits64 |= pessim64;
1426      return vbits64;
1427   }
1428
1429   /* Also, in appears that gcc generates string-stepping code in
1430      32-bit chunks on 64 bit platforms.  So, also grant an exception
1431      for this case.  Note that the first clause of the conditional
1432      (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1433      will get folded out in 32 bit builds. */
1434   if (VG_WORDSIZE == 8
1435       && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1436      tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1437      /* (really need "UifU" here...)
1438         vbits64 UifU= pessim64  (is pessimised by it, iow) */
1439      vbits64 |= pessim64;
1440      /* Mark the upper 32 bits as undefined, just to be on the safe
1441         side. */
1442      vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1443      return vbits64;
1444   }
1445
1446   /* Exemption doesn't apply.  Flag an addressing error in the normal
1447      way. */
1448   MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1449
1450   return vbits64;
1451}
1452
1453
1454static
1455__attribute__((noinline))
1456void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1457{
1458   SizeT szB = nBits / 8;
1459   SizeT i, n_addrs_bad = 0;
1460   UChar vbits8;
1461   Addr  ai;
1462   Bool  ok;
1463
1464   PROF_EVENT(35, "mc_STOREVn_slow");
1465
1466   /* ------------ BEGIN semi-fast cases ------------ */
1467   /* These deal quickly-ish with the common auxiliary primary map
1468      cases on 64-bit platforms.  Are merely a speedup hack; can be
1469      omitted without loss of correctness/functionality.  Note that in
1470      both cases the "sizeof(void*) == 8" causes these cases to be
1471      folded out by compilers on 32-bit platforms.  The logic below
1472      is somewhat similar to some cases extensively commented in
1473      MC_(helperc_STOREV8).
1474   */
1475   if (LIKELY(sizeof(void*) == 8
1476                      && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1477      SecMap* sm       = get_secmap_for_reading(a);
1478      UWord   sm_off16 = SM_OFF_16(a);
1479      UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1480      if (LIKELY( !is_distinguished_sm(sm) &&
1481                          (VA_BITS16_DEFINED   == vabits16 ||
1482                           VA_BITS16_UNDEFINED == vabits16) )) {
1483         /* Handle common case quickly: a is suitably aligned, */
1484         /* is mapped, and is addressible. */
1485         // Convert full V-bits in register to compact 2-bit form.
1486         if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1487            ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1488            return;
1489         } else if (V_BITS64_UNDEFINED == vbytes) {
1490            ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1491            return;
1492         }
1493         /* else fall into the slow case */
1494      }
1495      /* else fall into the slow case */
1496   }
1497   if (LIKELY(sizeof(void*) == 8
1498                      && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1499      SecMap* sm      = get_secmap_for_reading(a);
1500      UWord   sm_off  = SM_OFF(a);
1501      UWord   vabits8 = sm->vabits8[sm_off];
1502      if (LIKELY( !is_distinguished_sm(sm) &&
1503                          (VA_BITS8_DEFINED   == vabits8 ||
1504                           VA_BITS8_UNDEFINED == vabits8) )) {
1505         /* Handle common case quickly: a is suitably aligned, */
1506         /* is mapped, and is addressible. */
1507         // Convert full V-bits in register to compact 2-bit form.
1508         if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1509            sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1510            return;
1511         } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1512            sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1513            return;
1514         }
1515         /* else fall into the slow case */
1516      }
1517      /* else fall into the slow case */
1518   }
1519   /* ------------ END semi-fast cases ------------ */
1520
1521   tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1522
1523   /* Dump vbytes in memory, iterating from least to most significant
1524      byte.  At the same time establish addressibility of the location. */
1525   for (i = 0; i < szB; i++) {
1526      PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1527      ai     = a + byte_offset_w(szB, bigendian, i);
1528      vbits8 = vbytes & 0xff;
1529      ok     = set_vbits8(ai, vbits8);
1530      if (!ok) n_addrs_bad++;
1531      vbytes >>= 8;
1532   }
1533
1534   /* If an address error has happened, report it. */
1535   if (n_addrs_bad > 0)
1536      MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1537}
1538
1539
1540/*------------------------------------------------------------*/
1541/*--- Setting permissions over address ranges.             ---*/
1542/*------------------------------------------------------------*/
1543
1544static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1545                                      UWord dsm_num )
1546{
1547   UWord    sm_off, sm_off16;
1548   UWord    vabits2 = vabits16 & 0x3;
1549   SizeT    lenA, lenB, len_to_next_secmap;
1550   Addr     aNext;
1551   SecMap*  sm;
1552   SecMap** sm_ptr;
1553   SecMap*  example_dsm;
1554
1555   PROF_EVENT(150, "set_address_range_perms");
1556
1557   /* Check the V+A bits make sense. */
1558   tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1559             VA_BITS16_UNDEFINED == vabits16 ||
1560             VA_BITS16_DEFINED   == vabits16);
1561
1562   // This code should never write PDBs;  ensure this.  (See comment above
1563   // set_vabits2().)
1564   tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1565
1566   if (lenT == 0)
1567      return;
1568
1569   if (lenT > 256 * 1024 * 1024) {
1570      if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1571         const HChar* s = "unknown???";
1572         if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1573         if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1574         if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1575         VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1576                                  "large range [0x%lx, 0x%lx) (%s)\n",
1577                                  a, a + lenT, s);
1578      }
1579   }
1580
1581#ifndef PERF_FAST_SARP
1582   /*------------------ debug-only case ------------------ */
1583   {
1584      // Endianness doesn't matter here because all bytes are being set to
1585      // the same value.
1586      // Nb: We don't have to worry about updating the sec-V-bits table
1587      // after these set_vabits2() calls because this code never writes
1588      // VA_BITS2_PARTDEFINED values.
1589      SizeT i;
1590      for (i = 0; i < lenT; i++) {
1591         set_vabits2(a + i, vabits2);
1592      }
1593      return;
1594   }
1595#endif
1596
1597   /*------------------ standard handling ------------------ */
1598
1599   /* Get the distinguished secondary that we might want
1600      to use (part of the space-compression scheme). */
1601   example_dsm = &sm_distinguished[dsm_num];
1602
1603   // We have to handle ranges covering various combinations of partial and
1604   // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1605   // Cases marked with a '*' are common.
1606   //
1607   //   TYPE                                             PARTS USED
1608   //   ----                                             ----------
1609   // * one partial sec-map                  (p)         1
1610   // - one whole sec-map                    (P)         2
1611   //
1612   // * two partial sec-maps                 (pp)        1,3
1613   // - one partial, one whole sec-map       (pP)        1,2
1614   // - one whole, one partial sec-map       (Pp)        2,3
1615   // - two whole sec-maps                   (PP)        2,2
1616   //
1617   // * one partial, one whole, one partial  (pPp)       1,2,3
1618   // - one partial, two whole               (pPP)       1,2,2
1619   // - two whole, one partial               (PPp)       2,2,3
1620   // - three whole                          (PPP)       2,2,2
1621   //
1622   // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1623   // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1624   // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1625   // - N whole                              (PP...PP)   2,2...2,3
1626
1627   // Break up total length (lenT) into two parts:  length in the first
1628   // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1629   aNext = start_of_this_sm(a) + SM_SIZE;
1630   len_to_next_secmap = aNext - a;
1631   if ( lenT <= len_to_next_secmap ) {
1632      // Range entirely within one sec-map.  Covers almost all cases.
1633      PROF_EVENT(151, "set_address_range_perms-single-secmap");
1634      lenA = lenT;
1635      lenB = 0;
1636   } else if (is_start_of_sm(a)) {
1637      // Range spans at least one whole sec-map, and starts at the beginning
1638      // of a sec-map; skip to Part 2.
1639      PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1640      lenA = 0;
1641      lenB = lenT;
1642      goto part2;
1643   } else {
1644      // Range spans two or more sec-maps, first one is partial.
1645      PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1646      lenA = len_to_next_secmap;
1647      lenB = lenT - lenA;
1648   }
1649
1650   //------------------------------------------------------------------------
1651   // Part 1: Deal with the first sec_map.  Most of the time the range will be
1652   // entirely within a sec_map and this part alone will suffice.  Also,
1653   // doing it this way lets us avoid repeatedly testing for the crossing of
1654   // a sec-map boundary within these loops.
1655   //------------------------------------------------------------------------
1656
1657   // If it's distinguished, make it undistinguished if necessary.
1658   sm_ptr = get_secmap_ptr(a);
1659   if (is_distinguished_sm(*sm_ptr)) {
1660      if (*sm_ptr == example_dsm) {
1661         // Sec-map already has the V+A bits that we want, so skip.
1662         PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1663         a    = aNext;
1664         lenA = 0;
1665      } else {
1666         PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1667         *sm_ptr = copy_for_writing(*sm_ptr);
1668      }
1669   }
1670   sm = *sm_ptr;
1671
1672   // 1 byte steps
1673   while (True) {
1674      if (VG_IS_8_ALIGNED(a)) break;
1675      if (lenA < 1)           break;
1676      PROF_EVENT(156, "set_address_range_perms-loop1a");
1677      sm_off = SM_OFF(a);
1678      insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1679      a    += 1;
1680      lenA -= 1;
1681   }
1682   // 8-aligned, 8 byte steps
1683   while (True) {
1684      if (lenA < 8) break;
1685      PROF_EVENT(157, "set_address_range_perms-loop8a");
1686      sm_off16 = SM_OFF_16(a);
1687      ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1688      a    += 8;
1689      lenA -= 8;
1690   }
1691   // 1 byte steps
1692   while (True) {
1693      if (lenA < 1) break;
1694      PROF_EVENT(158, "set_address_range_perms-loop1b");
1695      sm_off = SM_OFF(a);
1696      insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1697      a    += 1;
1698      lenA -= 1;
1699   }
1700
1701   // We've finished the first sec-map.  Is that it?
1702   if (lenB == 0)
1703      return;
1704
1705   //------------------------------------------------------------------------
1706   // Part 2: Fast-set entire sec-maps at a time.
1707   //------------------------------------------------------------------------
1708  part2:
1709   // 64KB-aligned, 64KB steps.
1710   // Nb: we can reach here with lenB < SM_SIZE
1711   tl_assert(0 == lenA);
1712   while (True) {
1713      if (lenB < SM_SIZE) break;
1714      tl_assert(is_start_of_sm(a));
1715      PROF_EVENT(159, "set_address_range_perms-loop64K");
1716      sm_ptr = get_secmap_ptr(a);
1717      if (!is_distinguished_sm(*sm_ptr)) {
1718         PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1719         // Free the non-distinguished sec-map that we're replacing.  This
1720         // case happens moderately often, enough to be worthwhile.
1721         SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1722         tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1723      }
1724      update_SM_counts(*sm_ptr, example_dsm);
1725      // Make the sec-map entry point to the example DSM
1726      *sm_ptr = example_dsm;
1727      lenB -= SM_SIZE;
1728      a    += SM_SIZE;
1729   }
1730
1731   // We've finished the whole sec-maps.  Is that it?
1732   if (lenB == 0)
1733      return;
1734
1735   //------------------------------------------------------------------------
1736   // Part 3: Finish off the final partial sec-map, if necessary.
1737   //------------------------------------------------------------------------
1738
1739   tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1740
1741   // If it's distinguished, make it undistinguished if necessary.
1742   sm_ptr = get_secmap_ptr(a);
1743   if (is_distinguished_sm(*sm_ptr)) {
1744      if (*sm_ptr == example_dsm) {
1745         // Sec-map already has the V+A bits that we want, so stop.
1746         PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1747         return;
1748      } else {
1749         PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1750         *sm_ptr = copy_for_writing(*sm_ptr);
1751      }
1752   }
1753   sm = *sm_ptr;
1754
1755   // 8-aligned, 8 byte steps
1756   while (True) {
1757      if (lenB < 8) break;
1758      PROF_EVENT(163, "set_address_range_perms-loop8b");
1759      sm_off16 = SM_OFF_16(a);
1760      ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1761      a    += 8;
1762      lenB -= 8;
1763   }
1764   // 1 byte steps
1765   while (True) {
1766      if (lenB < 1) return;
1767      PROF_EVENT(164, "set_address_range_perms-loop1c");
1768      sm_off = SM_OFF(a);
1769      insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1770      a    += 1;
1771      lenB -= 1;
1772   }
1773}
1774
1775
1776/* --- Set permissions for arbitrary address ranges --- */
1777
1778void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1779{
1780   PROF_EVENT(40, "MC_(make_mem_noaccess)");
1781   DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1782   set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1783   if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1784      ocache_sarp_Clear_Origins ( a, len );
1785}
1786
1787static void make_mem_undefined ( Addr a, SizeT len )
1788{
1789   PROF_EVENT(41, "make_mem_undefined");
1790   DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1791   set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1792}
1793
1794void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1795{
1796   PROF_EVENT(43, "MC_(make_mem_undefined)");
1797   DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1798   set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1799   if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1800      ocache_sarp_Set_Origins ( a, len, otag );
1801}
1802
1803static
1804void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1805                                          ThreadId tid, UInt okind )
1806{
1807   UInt        ecu;
1808   ExeContext* here;
1809   /* VG_(record_ExeContext) checks for validity of tid, and asserts
1810      if it is invalid.  So no need to do it here. */
1811   tl_assert(okind <= 3);
1812   here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1813   tl_assert(here);
1814   ecu = VG_(get_ECU_from_ExeContext)(here);
1815   tl_assert(VG_(is_plausible_ECU)(ecu));
1816   MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1817}
1818
1819static
1820void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
1821{
1822   make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1823}
1824
1825static
1826void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
1827{
1828   MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1829}
1830
1831void MC_(make_mem_defined) ( Addr a, SizeT len )
1832{
1833   PROF_EVENT(42, "MC_(make_mem_defined)");
1834   DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1835   set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1836   if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1837      ocache_sarp_Clear_Origins ( a, len );
1838}
1839
1840/* For each byte in [a,a+len), if the byte is addressable, make it be
1841   defined, but if it isn't addressible, leave it alone.  In other
1842   words a version of MC_(make_mem_defined) that doesn't mess with
1843   addressibility.  Low-performance implementation. */
1844static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1845{
1846   SizeT i;
1847   UChar vabits2;
1848   DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1849   for (i = 0; i < len; i++) {
1850      vabits2 = get_vabits2( a+i );
1851      if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1852         set_vabits2(a+i, VA_BITS2_DEFINED);
1853         if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1854            MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1855         }
1856      }
1857   }
1858}
1859
1860/* Similarly (needed for mprotect handling ..) */
1861static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1862{
1863   SizeT i;
1864   UChar vabits2;
1865   DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1866   for (i = 0; i < len; i++) {
1867      vabits2 = get_vabits2( a+i );
1868      if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1869         set_vabits2(a+i, VA_BITS2_DEFINED);
1870         if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1871            MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1872         }
1873      }
1874   }
1875}
1876
1877/* --- Block-copy permissions (needed for implementing realloc() and
1878       sys_mremap). --- */
1879
1880void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1881{
1882   SizeT i, j;
1883   UChar vabits2, vabits8;
1884   Bool  aligned, nooverlap;
1885
1886   DEBUG("MC_(copy_address_range_state)\n");
1887   PROF_EVENT(50, "MC_(copy_address_range_state)");
1888
1889   if (len == 0 || src == dst)
1890      return;
1891
1892   aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1893   nooverlap = src+len <= dst || dst+len <= src;
1894
1895   if (nooverlap && aligned) {
1896
1897      /* Vectorised fast case, when no overlap and suitably aligned */
1898      /* vector loop */
1899      i = 0;
1900      while (len >= 4) {
1901         vabits8 = get_vabits8_for_aligned_word32( src+i );
1902         set_vabits8_for_aligned_word32( dst+i, vabits8 );
1903         if (LIKELY(VA_BITS8_DEFINED == vabits8
1904                            || VA_BITS8_UNDEFINED == vabits8
1905                            || VA_BITS8_NOACCESS == vabits8)) {
1906            /* do nothing */
1907         } else {
1908            /* have to copy secondary map info */
1909            if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1910               set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1911            if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1912               set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1913            if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1914               set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1915            if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1916               set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1917         }
1918         i += 4;
1919         len -= 4;
1920      }
1921      /* fixup loop */
1922      while (len >= 1) {
1923         vabits2 = get_vabits2( src+i );
1924         set_vabits2( dst+i, vabits2 );
1925         if (VA_BITS2_PARTDEFINED == vabits2) {
1926            set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1927         }
1928         i++;
1929         len--;
1930      }
1931
1932   } else {
1933
1934      /* We have to do things the slow way */
1935      if (src < dst) {
1936         for (i = 0, j = len-1; i < len; i++, j--) {
1937            PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1938            vabits2 = get_vabits2( src+j );
1939            set_vabits2( dst+j, vabits2 );
1940            if (VA_BITS2_PARTDEFINED == vabits2) {
1941               set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1942            }
1943         }
1944      }
1945
1946      if (src > dst) {
1947         for (i = 0; i < len; i++) {
1948            PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1949            vabits2 = get_vabits2( src+i );
1950            set_vabits2( dst+i, vabits2 );
1951            if (VA_BITS2_PARTDEFINED == vabits2) {
1952               set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1953            }
1954         }
1955      }
1956   }
1957
1958}
1959
1960
1961/*------------------------------------------------------------*/
1962/*--- Origin tracking stuff - cache basics                 ---*/
1963/*------------------------------------------------------------*/
1964
1965/* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1966   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1967
1968   Note that this implementation draws inspiration from the "origin
1969   tracking by value piggybacking" scheme described in "Tracking Bad
1970   Apples: Reporting the Origin of Null and Undefined Value Errors"
1971   (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1972   Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1973   implemented completely differently.
1974
1975   Origin tags and ECUs -- about the shadow values
1976   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1977
1978   This implementation tracks the defining point of all uninitialised
1979   values using so called "origin tags", which are 32-bit integers,
1980   rather than using the values themselves to encode the origins.  The
1981   latter, so-called value piggybacking", is what the OOPSLA07 paper
1982   describes.
1983
1984   Origin tags, as tracked by the machinery below, are 32-bit unsigned
1985   ints (UInts), regardless of the machine's word size.  Each tag
1986   comprises an upper 30-bit ECU field and a lower 2-bit
1987   'kind' field.  The ECU field is a number given out by m_execontext
1988   and has a 1-1 mapping with ExeContext*s.  An ECU can be used
1989   directly as an origin tag (otag), but in fact we want to put
1990   additional information 'kind' field to indicate roughly where the
1991   tag came from.  This helps print more understandable error messages
1992   for the user -- it has no other purpose.  In summary:
1993
1994   * Both ECUs and origin tags are represented as 32-bit words
1995
1996   * m_execontext and the core-tool interface deal purely in ECUs.
1997     They have no knowledge of origin tags - that is a purely
1998     Memcheck-internal matter.
1999
2000   * all valid ECUs have the lowest 2 bits zero and at least
2001     one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2002
2003   * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2004     constants defined in mc_include.h.
2005
2006   * to convert an otag back to an ECU, AND it with ~3
2007
2008   One important fact is that no valid otag is zero.  A zero otag is
2009   used by the implementation to indicate "no origin", which could
2010   mean that either the value is defined, or it is undefined but the
2011   implementation somehow managed to lose the origin.
2012
2013   The ECU used for memory created by malloc etc is derived from the
2014   stack trace at the time the malloc etc happens.  This means the
2015   mechanism can show the exact allocation point for heap-created
2016   uninitialised values.
2017
2018   In contrast, it is simply too expensive to create a complete
2019   backtrace for each stack allocation.  Therefore we merely use a
2020   depth-1 backtrace for stack allocations, which can be done once at
2021   translation time, rather than N times at run time.  The result of
2022   this is that, for stack created uninitialised values, Memcheck can
2023   only show the allocating function, and not what called it.
2024   Furthermore, compilers tend to move the stack pointer just once at
2025   the start of the function, to allocate all locals, and so in fact
2026   the stack origin almost always simply points to the opening brace
2027   of the function.  Net result is, for stack origins, the mechanism
2028   can tell you in which function the undefined value was created, but
2029   that's all.  Users will need to carefully check all locals in the
2030   specified function.
2031
2032   Shadowing registers and memory
2033   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2034
2035   Memory is shadowed using a two level cache structure (ocacheL1 and
2036   ocacheL2).  Memory references are first directed to ocacheL1.  This
2037   is a traditional 2-way set associative cache with 32-byte lines and
2038   approximate LRU replacement within each set.
2039
2040   A naive implementation would require storing one 32 bit otag for
2041   each byte of memory covered, a 4:1 space overhead.  Instead, there
2042   is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2043   that shows which of the 4 bytes have that shadow value and which
2044   have a shadow value of zero (indicating no origin).  Hence a lot of
2045   space is saved, but the cost is that only one different origin per
2046   4 bytes of address space can be represented.  This is a source of
2047   imprecision, but how much of a problem it really is remains to be
2048   seen.
2049
2050   A cache line that contains all zeroes ("no origins") contains no
2051   useful information, and can be ejected from the L1 cache "for
2052   free", in the sense that a read miss on the L1 causes a line of
2053   zeroes to be installed.  However, ejecting a line containing
2054   nonzeroes risks losing origin information permanently.  In order to
2055   prevent such lossage, ejected nonzero lines are placed in a
2056   secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2057   lines.  This can grow arbitrarily large, and so should ensure that
2058   Memcheck runs out of memory in preference to losing useful origin
2059   info due to cache size limitations.
2060
2061   Shadowing registers is a bit tricky, because the shadow values are
2062   32 bits, regardless of the size of the register.  That gives a
2063   problem for registers smaller than 32 bits.  The solution is to
2064   find spaces in the guest state that are unused, and use those to
2065   shadow guest state fragments smaller than 32 bits.  For example, on
2066   ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
2067   shadow are allocated for the register's otag, then there are still
2068   12 bytes left over which could be used to shadow 3 other values.
2069
2070   This implies there is some non-obvious mapping from guest state
2071   (start,length) pairs to the relevant shadow offset (for the origin
2072   tags).  And it is unfortunately guest-architecture specific.  The
2073   mapping is contained in mc_machine.c, which is quite lengthy but
2074   straightforward.
2075
2076   Instrumenting the IR
2077   ~~~~~~~~~~~~~~~~~~~~
2078
2079   Instrumentation is largely straightforward, and done by the
2080   functions schemeE and schemeS in mc_translate.c.  These generate
2081   code for handling the origin tags of expressions (E) and statements
2082   (S) respectively.  The rather strange names are a reference to the
2083   "compilation schemes" shown in Simon Peyton Jones' book "The
2084   Implementation of Functional Programming Languages" (Prentice Hall,
2085   1987, see
2086   http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2087
2088   schemeS merely arranges to move shadow values around the guest
2089   state to track the incoming IR.  schemeE is largely trivial too.
2090   The only significant point is how to compute the otag corresponding
2091   to binary (or ternary, quaternary, etc) operator applications.  The
2092   rule is simple: just take whichever value is larger (32-bit
2093   unsigned max).  Constants get the special value zero.  Hence this
2094   rule always propagates a nonzero (known) otag in preference to a
2095   zero (unknown, or more likely, value-is-defined) tag, as we want.
2096   If two different undefined values are inputs to a binary operator
2097   application, then which is propagated is arbitrary, but that
2098   doesn't matter, since the program is erroneous in using either of
2099   the values, and so there's no point in attempting to propagate
2100   both.
2101
2102   Since constants are abstracted to (otag) zero, much of the
2103   instrumentation code can be folded out without difficulty by the
2104   generic post-instrumentation IR cleanup pass, using these rules:
2105   Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2106   constants is evaluated at JIT time.  And the resulting dead code
2107   removal.  In practice this causes surprisingly few Max32Us to
2108   survive through to backend code generation.
2109
2110   Integration with the V-bits machinery
2111   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2112
2113   This is again largely straightforward.  Mostly the otag and V bits
2114   stuff are independent.  The only point of interaction is when the V
2115   bits instrumenter creates a call to a helper function to report an
2116   uninitialised value error -- in that case it must first use schemeE
2117   to get hold of the origin tag expression for the value, and pass
2118   that to the helper too.
2119
2120   There is the usual stuff to do with setting address range
2121   permissions.  When memory is painted undefined, we must also know
2122   the origin tag to paint with, which involves some tedious plumbing,
2123   particularly to do with the fast case stack handlers.  When memory
2124   is painted defined or noaccess then the origin tags must be forced
2125   to zero.
2126
2127   One of the goals of the implementation was to ensure that the
2128   non-origin tracking mode isn't slowed down at all.  To do this,
2129   various functions to do with memory permissions setting (again,
2130   mostly pertaining to the stack) are duplicated for the with- and
2131   without-otag case.
2132
2133   Dealing with stack redzones, and the NIA cache
2134   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2135
2136   This is one of the few non-obvious parts of the implementation.
2137
2138   Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2139   reserved area below the stack pointer, that can be used as scratch
2140   space by compiler generated code for functions.  In the Memcheck
2141   sources this is referred to as the "stack redzone".  The important
2142   thing here is that such redzones are considered volatile across
2143   function calls and returns.  So Memcheck takes care to mark them as
2144   undefined for each call and return, on the afflicted platforms.
2145   Past experience shows this is essential in order to get reliable
2146   messages about uninitialised values that come from the stack.
2147
2148   So the question is, when we paint a redzone undefined, what origin
2149   tag should we use for it?  Consider a function f() calling g().  If
2150   we paint the redzone using an otag derived from the ExeContext of
2151   the CALL/BL instruction in f, then any errors in g causing it to
2152   use uninitialised values that happen to lie in the redzone, will be
2153   reported as having their origin in f.  Which is highly confusing.
2154
2155   The same applies for returns: if, on a return, we paint the redzone
2156   using a origin tag derived from the ExeContext of the RET/BLR
2157   instruction in g, then any later errors in f causing it to use
2158   uninitialised values in the redzone, will be reported as having
2159   their origin in g.  Which is just as confusing.
2160
2161   To do it right, in both cases we need to use an origin tag which
2162   pertains to the instruction which dynamically follows the CALL/BL
2163   or RET/BLR.  In short, one derived from the NIA - the "next
2164   instruction address".
2165
2166   To make this work, Memcheck's redzone-painting helper,
2167   MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2168   NIA.  It converts the NIA to a 1-element ExeContext, and uses that
2169   ExeContext's ECU as the basis for the otag used to paint the
2170   redzone.  The expensive part of this is converting an NIA into an
2171   ECU, since this happens once for every call and every return.  So
2172   we use a simple 511-line, 2-way set associative cache
2173   (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2174   the cost out.
2175
2176   Further background comments
2177   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2178
2179   > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
2180   > it really just the address of the relevant ExeContext?
2181
2182   Well, it's not the address, but a value which has a 1-1 mapping
2183   with ExeContexts, and is guaranteed not to be zero, since zero
2184   denotes (to memcheck) "unknown origin or defined value".  So these
2185   UInts are just numbers starting at 4 and incrementing by 4; each
2186   ExeContext is given a number when it is created.  (*** NOTE this
2187   confuses otags and ECUs; see comments above ***).
2188
2189   Making these otags 32-bit regardless of the machine's word size
2190   makes the 64-bit implementation easier (next para).  And it doesn't
2191   really limit us in any way, since for the tags to overflow would
2192   require that the program somehow caused 2^30-1 different
2193   ExeContexts to be created, in which case it is probably in deep
2194   trouble.  Not to mention V will have soaked up many tens of
2195   gigabytes of memory merely to store them all.
2196
2197   So having 64-bit origins doesn't really buy you anything, and has
2198   the following downsides:
2199
2200   Suppose that instead, an otag is a UWord.  This would mean that, on
2201   a 64-bit target,
2202
2203   1. It becomes hard to shadow any element of guest state which is
2204      smaller than 8 bytes.  To do so means you'd need to find some
2205      8-byte-sized hole in the guest state which you don't want to
2206      shadow, and use that instead to hold the otag.  On ppc64, the
2207      condition code register(s) are split into 20 UChar sized pieces,
2208      all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2209      and so that would entail finding 160 bytes somewhere else in the
2210      guest state.
2211
2212      Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2213      of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2214      same) and so I had to look for 4 untracked otag-sized areas in
2215      the guest state to make that possible.
2216
2217      The same problem exists of course when origin tags are only 32
2218      bits, but it's less extreme.
2219
2220   2. (More compelling) it doubles the size of the origin shadow
2221      memory.  Given that the shadow memory is organised as a fixed
2222      size cache, and that accuracy of tracking is limited by origins
2223      falling out the cache due to space conflicts, this isn't good.
2224
2225   > Another question: is the origin tracking perfect, or are there
2226   > cases where it fails to determine an origin?
2227
2228   It is imperfect for at least for the following reasons, and
2229   probably more:
2230
2231   * Insufficient capacity in the origin cache.  When a line is
2232     evicted from the cache it is gone forever, and so subsequent
2233     queries for the line produce zero, indicating no origin
2234     information.  Interestingly, a line containing all zeroes can be
2235     evicted "free" from the cache, since it contains no useful
2236     information, so there is scope perhaps for some cleverer cache
2237     management schemes.  (*** NOTE, with the introduction of the
2238     second level origin tag cache, ocacheL2, this is no longer a
2239     problem. ***)
2240
2241   * The origin cache only stores one otag per 32-bits of address
2242     space, plus 4 bits indicating which of the 4 bytes has that tag
2243     and which are considered defined.  The result is that if two
2244     undefined bytes in the same word are stored in memory, the first
2245     stored byte's origin will be lost and replaced by the origin for
2246     the second byte.
2247
2248   * Nonzero origin tags for defined values.  Consider a binary
2249     operator application op(x,y).  Suppose y is undefined (and so has
2250     a valid nonzero origin tag), and x is defined, but erroneously
2251     has a nonzero origin tag (defined values should have tag zero).
2252     If the erroneous tag has a numeric value greater than y's tag,
2253     then the rule for propagating origin tags though binary
2254     operations, which is simply to take the unsigned max of the two
2255     tags, will erroneously propagate x's tag rather than y's.
2256
2257   * Some obscure uses of x86/amd64 byte registers can cause lossage
2258     or confusion of origins.  %AH .. %DH are treated as different
2259     from, and unrelated to, their parent registers, %EAX .. %EDX.
2260     So some wierd sequences like
2261
2262        movb undefined-value, %AH
2263        movb defined-value, %AL
2264        .. use %AX or %EAX ..
2265
2266     will cause the origin attributed to %AH to be ignored, since %AL,
2267     %AX, %EAX are treated as the same register, and %AH as a
2268     completely separate one.
2269
2270   But having said all that, it actually seems to work fairly well in
2271   practice.
2272*/
2273
2274static UWord stats_ocacheL1_find           = 0;
2275static UWord stats_ocacheL1_found_at_1     = 0;
2276static UWord stats_ocacheL1_found_at_N     = 0;
2277static UWord stats_ocacheL1_misses         = 0;
2278static UWord stats_ocacheL1_lossage        = 0;
2279static UWord stats_ocacheL1_movefwds       = 0;
2280
2281static UWord stats__ocacheL2_refs          = 0;
2282static UWord stats__ocacheL2_misses        = 0;
2283static UWord stats__ocacheL2_n_nodes_max   = 0;
2284
2285/* Cache of 32-bit values, one every 32 bits of address space */
2286
2287#define OC_BITS_PER_LINE 5
2288#define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2289
2290static INLINE UWord oc_line_offset ( Addr a ) {
2291   return (a >> 2) & (OC_W32S_PER_LINE - 1);
2292}
2293static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2294   return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2295}
2296
2297#define OC_LINES_PER_SET 2
2298
2299#define OC_N_SET_BITS    20
2300#define OC_N_SETS        (1 << OC_N_SET_BITS)
2301
2302/* These settings give:
2303   64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2304   32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2305*/
2306
2307#define OC_MOVE_FORWARDS_EVERY_BITS 7
2308
2309
2310typedef
2311   struct {
2312      Addr  tag;
2313      UInt  w32[OC_W32S_PER_LINE];
2314      UChar descr[OC_W32S_PER_LINE];
2315   }
2316   OCacheLine;
2317
2318/* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2319   in use, 'n' (nonzero) if it contains at least one valid origin tag,
2320   and 'z' if all the represented tags are zero. */
2321static UChar classify_OCacheLine ( OCacheLine* line )
2322{
2323   UWord i;
2324   if (line->tag == 1/*invalid*/)
2325      return 'e'; /* EMPTY */
2326   tl_assert(is_valid_oc_tag(line->tag));
2327   for (i = 0; i < OC_W32S_PER_LINE; i++) {
2328      tl_assert(0 == ((~0xF) & line->descr[i]));
2329      if (line->w32[i] > 0 && line->descr[i] > 0)
2330         return 'n'; /* NONZERO - contains useful info */
2331   }
2332   return 'z'; /* ZERO - no useful info */
2333}
2334
2335typedef
2336   struct {
2337      OCacheLine line[OC_LINES_PER_SET];
2338   }
2339   OCacheSet;
2340
2341typedef
2342   struct {
2343      OCacheSet set[OC_N_SETS];
2344   }
2345   OCache;
2346
2347static OCache* ocacheL1 = NULL;
2348static UWord   ocacheL1_event_ctr = 0;
2349
2350static void init_ocacheL2 ( void ); /* fwds */
2351static void init_OCache ( void )
2352{
2353   UWord line, set;
2354   tl_assert(MC_(clo_mc_level) >= 3);
2355   tl_assert(ocacheL1 == NULL);
2356   ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2357   if (ocacheL1 == NULL) {
2358      VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2359                                   sizeof(OCache) );
2360   }
2361   tl_assert(ocacheL1 != NULL);
2362   for (set = 0; set < OC_N_SETS; set++) {
2363      for (line = 0; line < OC_LINES_PER_SET; line++) {
2364         ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2365      }
2366   }
2367   init_ocacheL2();
2368}
2369
2370static void moveLineForwards ( OCacheSet* set, UWord lineno )
2371{
2372   OCacheLine tmp;
2373   stats_ocacheL1_movefwds++;
2374   tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2375   tmp = set->line[lineno-1];
2376   set->line[lineno-1] = set->line[lineno];
2377   set->line[lineno] = tmp;
2378}
2379
2380static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2381   UWord i;
2382   for (i = 0; i < OC_W32S_PER_LINE; i++) {
2383      line->w32[i] = 0; /* NO ORIGIN */
2384      line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2385   }
2386   line->tag = tag;
2387}
2388
2389//////////////////////////////////////////////////////////////
2390//// OCache backing store
2391
2392static OSet* ocacheL2 = NULL;
2393
2394static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2395   return VG_(malloc)(cc, szB);
2396}
2397static void ocacheL2_free ( void* v ) {
2398   VG_(free)( v );
2399}
2400
2401/* Stats: # nodes currently in tree */
2402static UWord stats__ocacheL2_n_nodes = 0;
2403
2404static void init_ocacheL2 ( void )
2405{
2406   tl_assert(!ocacheL2);
2407   tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2408   tl_assert(0 == offsetof(OCacheLine,tag));
2409   ocacheL2
2410      = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2411                             NULL, /* fast cmp */
2412                             ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2413   stats__ocacheL2_n_nodes = 0;
2414}
2415
2416/* Find line with the given tag in the tree, or NULL if not found. */
2417static OCacheLine* ocacheL2_find_tag ( Addr tag )
2418{
2419   OCacheLine* line;
2420   tl_assert(is_valid_oc_tag(tag));
2421   stats__ocacheL2_refs++;
2422   line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2423   return line;
2424}
2425
2426/* Delete the line with the given tag from the tree, if it is present, and
2427   free up the associated memory. */
2428static void ocacheL2_del_tag ( Addr tag )
2429{
2430   OCacheLine* line;
2431   tl_assert(is_valid_oc_tag(tag));
2432   stats__ocacheL2_refs++;
2433   line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2434   if (line) {
2435      VG_(OSetGen_FreeNode)(ocacheL2, line);
2436      tl_assert(stats__ocacheL2_n_nodes > 0);
2437      stats__ocacheL2_n_nodes--;
2438   }
2439}
2440
2441/* Add a copy of the given line to the tree.  It must not already be
2442   present. */
2443static void ocacheL2_add_line ( OCacheLine* line )
2444{
2445   OCacheLine* copy;
2446   tl_assert(is_valid_oc_tag(line->tag));
2447   copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2448   *copy = *line;
2449   stats__ocacheL2_refs++;
2450   VG_(OSetGen_Insert)( ocacheL2, copy );
2451   stats__ocacheL2_n_nodes++;
2452   if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2453      stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2454}
2455
2456////
2457//////////////////////////////////////////////////////////////
2458
2459__attribute__((noinline))
2460static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2461{
2462   OCacheLine *victim, *inL2;
2463   UChar c;
2464   UWord line;
2465   UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2466   UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2467   UWord tag     = a & tagmask;
2468   tl_assert(setno >= 0 && setno < OC_N_SETS);
2469
2470   /* we already tried line == 0; skip therefore. */
2471   for (line = 1; line < OC_LINES_PER_SET; line++) {
2472      if (ocacheL1->set[setno].line[line].tag == tag) {
2473         if (line == 1) {
2474            stats_ocacheL1_found_at_1++;
2475         } else {
2476            stats_ocacheL1_found_at_N++;
2477         }
2478         if (UNLIKELY(0 == (ocacheL1_event_ctr++
2479                            & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2480            moveLineForwards( &ocacheL1->set[setno], line );
2481            line--;
2482         }
2483         return &ocacheL1->set[setno].line[line];
2484      }
2485   }
2486
2487   /* A miss.  Use the last slot.  Implicitly this means we're
2488      ejecting the line in the last slot. */
2489   stats_ocacheL1_misses++;
2490   tl_assert(line == OC_LINES_PER_SET);
2491   line--;
2492   tl_assert(line > 0);
2493
2494   /* First, move the to-be-ejected line to the L2 cache. */
2495   victim = &ocacheL1->set[setno].line[line];
2496   c = classify_OCacheLine(victim);
2497   switch (c) {
2498      case 'e':
2499         /* the line is empty (has invalid tag); ignore it. */
2500         break;
2501      case 'z':
2502         /* line contains zeroes.  We must ensure the backing store is
2503            updated accordingly, either by copying the line there
2504            verbatim, or by ensuring it isn't present there.  We
2505            chosse the latter on the basis that it reduces the size of
2506            the backing store. */
2507         ocacheL2_del_tag( victim->tag );
2508         break;
2509      case 'n':
2510         /* line contains at least one real, useful origin.  Copy it
2511            to the backing store. */
2512         stats_ocacheL1_lossage++;
2513         inL2 = ocacheL2_find_tag( victim->tag );
2514         if (inL2) {
2515            *inL2 = *victim;
2516         } else {
2517            ocacheL2_add_line( victim );
2518         }
2519         break;
2520      default:
2521         tl_assert(0);
2522   }
2523
2524   /* Now we must reload the L1 cache from the backing tree, if
2525      possible. */
2526   tl_assert(tag != victim->tag); /* stay sane */
2527   inL2 = ocacheL2_find_tag( tag );
2528   if (inL2) {
2529      /* We're in luck.  It's in the L2. */
2530      ocacheL1->set[setno].line[line] = *inL2;
2531   } else {
2532      /* Missed at both levels of the cache hierarchy.  We have to
2533         declare it as full of zeroes (unknown origins). */
2534      stats__ocacheL2_misses++;
2535      zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2536   }
2537
2538   /* Move it one forwards */
2539   moveLineForwards( &ocacheL1->set[setno], line );
2540   line--;
2541
2542   return &ocacheL1->set[setno].line[line];
2543}
2544
2545static INLINE OCacheLine* find_OCacheLine ( Addr a )
2546{
2547   UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2548   UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2549   UWord tag     = a & tagmask;
2550
2551   stats_ocacheL1_find++;
2552
2553   if (OC_ENABLE_ASSERTIONS) {
2554      tl_assert(setno >= 0 && setno < OC_N_SETS);
2555      tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2556   }
2557
2558   if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2559      return &ocacheL1->set[setno].line[0];
2560   }
2561
2562   return find_OCacheLine_SLOW( a );
2563}
2564
2565static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2566{
2567   //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2568   //// Set the origins for a+0 .. a+7
2569   { OCacheLine* line;
2570     UWord lineoff = oc_line_offset(a);
2571     if (OC_ENABLE_ASSERTIONS) {
2572        tl_assert(lineoff >= 0
2573                  && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2574     }
2575     line = find_OCacheLine( a );
2576     line->descr[lineoff+0] = 0xF;
2577     line->descr[lineoff+1] = 0xF;
2578     line->w32[lineoff+0]   = otag;
2579     line->w32[lineoff+1]   = otag;
2580   }
2581   //// END inlined, specialised version of MC_(helperc_b_store8)
2582}
2583
2584
2585/*------------------------------------------------------------*/
2586/*--- Aligned fast case permission setters,                ---*/
2587/*--- for dealing with stacks                              ---*/
2588/*------------------------------------------------------------*/
2589
2590/*--------------------- 32-bit ---------------------*/
2591
2592/* Nb: by "aligned" here we mean 4-byte aligned */
2593
2594static INLINE void make_aligned_word32_undefined ( Addr a )
2595{
2596   PROF_EVENT(300, "make_aligned_word32_undefined");
2597
2598#ifndef PERF_FAST_STACK2
2599   make_mem_undefined(a, 4);
2600#else
2601   {
2602      UWord   sm_off;
2603      SecMap* sm;
2604
2605      if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2606         PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2607         make_mem_undefined(a, 4);
2608         return;
2609      }
2610
2611      sm                  = get_secmap_for_writing_low(a);
2612      sm_off              = SM_OFF(a);
2613      sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2614   }
2615#endif
2616}
2617
2618static INLINE
2619void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2620{
2621   make_aligned_word32_undefined(a);
2622   //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2623   //// Set the origins for a+0 .. a+3
2624   { OCacheLine* line;
2625     UWord lineoff = oc_line_offset(a);
2626     if (OC_ENABLE_ASSERTIONS) {
2627        tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2628     }
2629     line = find_OCacheLine( a );
2630     line->descr[lineoff] = 0xF;
2631     line->w32[lineoff]   = otag;
2632   }
2633   //// END inlined, specialised version of MC_(helperc_b_store4)
2634}
2635
2636static INLINE
2637void make_aligned_word32_noaccess ( Addr a )
2638{
2639   PROF_EVENT(310, "make_aligned_word32_noaccess");
2640
2641#ifndef PERF_FAST_STACK2
2642   MC_(make_mem_noaccess)(a, 4);
2643#else
2644   {
2645      UWord   sm_off;
2646      SecMap* sm;
2647
2648      if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2649         PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2650         MC_(make_mem_noaccess)(a, 4);
2651         return;
2652      }
2653
2654      sm                  = get_secmap_for_writing_low(a);
2655      sm_off              = SM_OFF(a);
2656      sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2657
2658      //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2659      //// Set the origins for a+0 .. a+3.
2660      if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2661         OCacheLine* line;
2662         UWord lineoff = oc_line_offset(a);
2663         if (OC_ENABLE_ASSERTIONS) {
2664            tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2665         }
2666         line = find_OCacheLine( a );
2667         line->descr[lineoff] = 0;
2668      }
2669      //// END inlined, specialised version of MC_(helperc_b_store4)
2670   }
2671#endif
2672}
2673
2674/*--------------------- 64-bit ---------------------*/
2675
2676/* Nb: by "aligned" here we mean 8-byte aligned */
2677
2678static INLINE void make_aligned_word64_undefined ( Addr a )
2679{
2680   PROF_EVENT(320, "make_aligned_word64_undefined");
2681
2682#ifndef PERF_FAST_STACK2
2683   make_mem_undefined(a, 8);
2684#else
2685   {
2686      UWord   sm_off16;
2687      SecMap* sm;
2688
2689      if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2690         PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2691         make_mem_undefined(a, 8);
2692         return;
2693      }
2694
2695      sm       = get_secmap_for_writing_low(a);
2696      sm_off16 = SM_OFF_16(a);
2697      ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2698   }
2699#endif
2700}
2701
2702static INLINE
2703void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2704{
2705   make_aligned_word64_undefined(a);
2706   //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2707   //// Set the origins for a+0 .. a+7
2708   { OCacheLine* line;
2709     UWord lineoff = oc_line_offset(a);
2710     tl_assert(lineoff >= 0
2711               && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2712     line = find_OCacheLine( a );
2713     line->descr[lineoff+0] = 0xF;
2714     line->descr[lineoff+1] = 0xF;
2715     line->w32[lineoff+0]   = otag;
2716     line->w32[lineoff+1]   = otag;
2717   }
2718   //// END inlined, specialised version of MC_(helperc_b_store8)
2719}
2720
2721static INLINE
2722void make_aligned_word64_noaccess ( Addr a )
2723{
2724   PROF_EVENT(330, "make_aligned_word64_noaccess");
2725
2726#ifndef PERF_FAST_STACK2
2727   MC_(make_mem_noaccess)(a, 8);
2728#else
2729   {
2730      UWord   sm_off16;
2731      SecMap* sm;
2732
2733      if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2734         PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2735         MC_(make_mem_noaccess)(a, 8);
2736         return;
2737      }
2738
2739      sm       = get_secmap_for_writing_low(a);
2740      sm_off16 = SM_OFF_16(a);
2741      ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2742
2743      //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2744      //// Clear the origins for a+0 .. a+7.
2745      if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2746         OCacheLine* line;
2747         UWord lineoff = oc_line_offset(a);
2748         tl_assert(lineoff >= 0
2749                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2750         line = find_OCacheLine( a );
2751         line->descr[lineoff+0] = 0;
2752         line->descr[lineoff+1] = 0;
2753      }
2754      //// END inlined, specialised version of MC_(helperc_b_store8)
2755   }
2756#endif
2757}
2758
2759
2760/*------------------------------------------------------------*/
2761/*--- Stack pointer adjustment                             ---*/
2762/*------------------------------------------------------------*/
2763
2764#ifdef PERF_FAST_STACK
2765#  define MAYBE_USED
2766#else
2767#  define MAYBE_USED __attribute__((unused))
2768#endif
2769
2770/*--------------- adjustment by 4 bytes ---------------*/
2771
2772MAYBE_USED
2773static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2774{
2775   UInt otag = ecu | MC_OKIND_STACK;
2776   PROF_EVENT(110, "new_mem_stack_4");
2777   if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2778      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2779   } else {
2780      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2781   }
2782}
2783
2784MAYBE_USED
2785static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2786{
2787   PROF_EVENT(110, "new_mem_stack_4");
2788   if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2789      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2790   } else {
2791      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2792   }
2793}
2794
2795MAYBE_USED
2796static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2797{
2798   PROF_EVENT(120, "die_mem_stack_4");
2799   if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2800      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2801   } else {
2802      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2803   }
2804}
2805
2806/*--------------- adjustment by 8 bytes ---------------*/
2807
2808MAYBE_USED
2809static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2810{
2811   UInt otag = ecu | MC_OKIND_STACK;
2812   PROF_EVENT(111, "new_mem_stack_8");
2813   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2814      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2815   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2816      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2817      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2818   } else {
2819      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2820   }
2821}
2822
2823MAYBE_USED
2824static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2825{
2826   PROF_EVENT(111, "new_mem_stack_8");
2827   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2828      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2829   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2830      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2831      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2832   } else {
2833      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2834   }
2835}
2836
2837MAYBE_USED
2838static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2839{
2840   PROF_EVENT(121, "die_mem_stack_8");
2841   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2842      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2843   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2844      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2845      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2846   } else {
2847      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2848   }
2849}
2850
2851/*--------------- adjustment by 12 bytes ---------------*/
2852
2853MAYBE_USED
2854static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2855{
2856   UInt otag = ecu | MC_OKIND_STACK;
2857   PROF_EVENT(112, "new_mem_stack_12");
2858   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2859      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2860      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2861   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2862      /* from previous test we don't have 8-alignment at offset +0,
2863         hence must have 8 alignment at offsets +4/-4.  Hence safe to
2864         do 4 at +0 and then 8 at +4/. */
2865      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2866      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2867   } else {
2868      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2869   }
2870}
2871
2872MAYBE_USED
2873static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2874{
2875   PROF_EVENT(112, "new_mem_stack_12");
2876   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2877      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2878      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2879   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2880      /* from previous test we don't have 8-alignment at offset +0,
2881         hence must have 8 alignment at offsets +4/-4.  Hence safe to
2882         do 4 at +0 and then 8 at +4/. */
2883      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2884      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2885   } else {
2886      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2887   }
2888}
2889
2890MAYBE_USED
2891static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2892{
2893   PROF_EVENT(122, "die_mem_stack_12");
2894   /* Note the -12 in the test */
2895   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2896      /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2897         -4. */
2898      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2899      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2900   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2901      /* We have 4-alignment at +0, but we don't have 8-alignment at
2902         -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2903         and then 8 at -8. */
2904      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2905      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2906   } else {
2907      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2908   }
2909}
2910
2911/*--------------- adjustment by 16 bytes ---------------*/
2912
2913MAYBE_USED
2914static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2915{
2916   UInt otag = ecu | MC_OKIND_STACK;
2917   PROF_EVENT(113, "new_mem_stack_16");
2918   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2919      /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2920      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2921      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2922   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2923      /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2924         Hence do 4 at +0, 8 at +4, 4 at +12. */
2925      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2926      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2927      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2928   } else {
2929      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2930   }
2931}
2932
2933MAYBE_USED
2934static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2935{
2936   PROF_EVENT(113, "new_mem_stack_16");
2937   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2938      /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2939      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2940      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2941   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2942      /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2943         Hence do 4 at +0, 8 at +4, 4 at +12. */
2944      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2945      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
2946      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2947   } else {
2948      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2949   }
2950}
2951
2952MAYBE_USED
2953static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2954{
2955   PROF_EVENT(123, "die_mem_stack_16");
2956   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2957      /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2958      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2959      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2960   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2961      /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
2962      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2963      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2964      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2965   } else {
2966      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2967   }
2968}
2969
2970/*--------------- adjustment by 32 bytes ---------------*/
2971
2972MAYBE_USED
2973static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2974{
2975   UInt otag = ecu | MC_OKIND_STACK;
2976   PROF_EVENT(114, "new_mem_stack_32");
2977   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2978      /* Straightforward */
2979      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2980      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2981      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2982      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2983   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2984      /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2985         +0,+28. */
2986      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2987      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2988      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2989      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2990      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2991   } else {
2992      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2993   }
2994}
2995
2996MAYBE_USED
2997static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2998{
2999   PROF_EVENT(114, "new_mem_stack_32");
3000   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3001      /* Straightforward */
3002      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3003      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3004      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3005      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3006   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007      /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
3008         +0,+28. */
3009      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3010      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3011      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3012      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3013      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3014   } else {
3015      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3016   }
3017}
3018
3019MAYBE_USED
3020static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3021{
3022   PROF_EVENT(124, "die_mem_stack_32");
3023   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3024      /* Straightforward */
3025      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3026      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3027      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3028      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3029   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030      /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
3031         4 at -32,-4. */
3032      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3033      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3034      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3035      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3036      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
3037   } else {
3038      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3039   }
3040}
3041
3042/*--------------- adjustment by 112 bytes ---------------*/
3043
3044MAYBE_USED
3045static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3046{
3047   UInt otag = ecu | MC_OKIND_STACK;
3048   PROF_EVENT(115, "new_mem_stack_112");
3049   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3050      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3051      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3052      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3053      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3054      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3055      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3056      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3057      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3058      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3059      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3060      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3061      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3062      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3063      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3064   } else {
3065      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3066   }
3067}
3068
3069MAYBE_USED
3070static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3071{
3072   PROF_EVENT(115, "new_mem_stack_112");
3073   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3074      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3075      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3076      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3077      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3078      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3079      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3080      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3081      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3082      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3083      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3084      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3085      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3086      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3087      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3088   } else {
3089      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3090   }
3091}
3092
3093MAYBE_USED
3094static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3095{
3096   PROF_EVENT(125, "die_mem_stack_112");
3097   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3098      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3099      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3100      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3101      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3102      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3103      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3104      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3105      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3106      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3107      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3108      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3109      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3110      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3111      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3112   } else {
3113      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3114   }
3115}
3116
3117/*--------------- adjustment by 128 bytes ---------------*/
3118
3119MAYBE_USED
3120static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3121{
3122   UInt otag = ecu | MC_OKIND_STACK;
3123   PROF_EVENT(116, "new_mem_stack_128");
3124   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3125      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3126      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3127      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3128      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3129      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3130      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3131      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3132      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3133      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3134      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3135      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3136      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3137      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3138      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3139      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3140      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3141   } else {
3142      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3143   }
3144}
3145
3146MAYBE_USED
3147static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3148{
3149   PROF_EVENT(116, "new_mem_stack_128");
3150   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3151      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3152      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3153      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3154      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3155      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3156      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3157      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3158      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3159      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3160      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3161      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3162      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3163      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3164      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3165      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3166      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3167   } else {
3168      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3169   }
3170}
3171
3172MAYBE_USED
3173static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3174{
3175   PROF_EVENT(126, "die_mem_stack_128");
3176   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3177      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3178      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3179      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3180      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3181      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3182      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3183      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3184      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3185      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3186      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3187      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3188      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3189      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3190      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3191      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3192      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3193   } else {
3194      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3195   }
3196}
3197
3198/*--------------- adjustment by 144 bytes ---------------*/
3199
3200MAYBE_USED
3201static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3202{
3203   UInt otag = ecu | MC_OKIND_STACK;
3204   PROF_EVENT(117, "new_mem_stack_144");
3205   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3206      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3207      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3208      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3209      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3210      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3211      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3212      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3213      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3214      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3215      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3216      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3217      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3218      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3219      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3220      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3221      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3222      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3223      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3224   } else {
3225      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3226   }
3227}
3228
3229MAYBE_USED
3230static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3231{
3232   PROF_EVENT(117, "new_mem_stack_144");
3233   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3234      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3235      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3236      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3237      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3238      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3239      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3240      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3241      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3242      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3243      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3244      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3245      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3246      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3247      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3248      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3249      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3250      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3251      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3252   } else {
3253      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3254   }
3255}
3256
3257MAYBE_USED
3258static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3259{
3260   PROF_EVENT(127, "die_mem_stack_144");
3261   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3262      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3263      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3264      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3265      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3266      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3267      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3268      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3269      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3270      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3271      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3272      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3273      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3274      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3275      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3276      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3277      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3278      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3279      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3280   } else {
3281      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3282   }
3283}
3284
3285/*--------------- adjustment by 160 bytes ---------------*/
3286
3287MAYBE_USED
3288static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3289{
3290   UInt otag = ecu | MC_OKIND_STACK;
3291   PROF_EVENT(118, "new_mem_stack_160");
3292   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3293      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3294      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3295      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3296      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3297      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3298      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3299      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3300      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3301      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3302      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3303      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3304      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3305      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3306      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3307      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3308      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3309      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3310      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3311      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3312      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3313   } else {
3314      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3315   }
3316}
3317
3318MAYBE_USED
3319static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3320{
3321   PROF_EVENT(118, "new_mem_stack_160");
3322   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3323      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3324      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3325      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3326      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3327      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3328      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3329      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3330      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3331      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3332      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3333      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3334      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3335      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3336      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3337      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3338      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3339      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3340      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3341      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3342      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3343   } else {
3344      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3345   }
3346}
3347
3348MAYBE_USED
3349static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3350{
3351   PROF_EVENT(128, "die_mem_stack_160");
3352   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3353      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3354      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3355      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3356      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3357      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3358      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3359      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3360      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3361      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3362      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3363      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3364      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3365      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3366      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3367      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3368      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3369      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3370      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3371      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3372      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3373   } else {
3374      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3375   }
3376}
3377
3378/*--------------- adjustment by N bytes ---------------*/
3379
3380static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3381{
3382   UInt otag = ecu | MC_OKIND_STACK;
3383   PROF_EVENT(115, "new_mem_stack_w_otag");
3384   MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3385}
3386
3387static void mc_new_mem_stack ( Addr a, SizeT len )
3388{
3389   PROF_EVENT(115, "new_mem_stack");
3390   make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3391}
3392
3393static void mc_die_mem_stack ( Addr a, SizeT len )
3394{
3395   PROF_EVENT(125, "die_mem_stack");
3396   MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3397}
3398
3399
3400/* The AMD64 ABI says:
3401
3402   "The 128-byte area beyond the location pointed to by %rsp is considered
3403    to be reserved and shall not be modified by signal or interrupt
3404    handlers.  Therefore, functions may use this area for temporary data
3405    that is not needed across function calls.  In particular, leaf functions
3406    may use this area for their entire stack frame, rather than adjusting
3407    the stack pointer in the prologue and epilogue.  This area is known as
3408    red zone [sic]."
3409
3410   So after any call or return we need to mark this redzone as containing
3411   undefined values.
3412
3413   Consider this:  we're in function f.  f calls g.  g moves rsp down
3414   modestly (say 16 bytes) and writes stuff all over the red zone, making it
3415   defined.  g returns.  f is buggy and reads from parts of the red zone
3416   that it didn't write on.  But because g filled that area in, f is going
3417   to be picking up defined V bits and so any errors from reading bits of
3418   the red zone it didn't write, will be missed.  The only solution I could
3419   think of was to make the red zone undefined when g returns to f.
3420
3421   This is in accordance with the ABI, which makes it clear the redzone
3422   is volatile across function calls.
3423
3424   The problem occurs the other way round too: f could fill the RZ up
3425   with defined values and g could mistakenly read them.  So the RZ
3426   also needs to be nuked on function calls.
3427*/
3428
3429
3430/* Here's a simple cache to hold nia -> ECU mappings.  It could be
3431   improved so as to have a lower miss rate. */
3432
3433static UWord stats__nia_cache_queries = 0;
3434static UWord stats__nia_cache_misses  = 0;
3435
3436typedef
3437   struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3438            UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3439   WCacheEnt;
3440
3441#define N_NIA_TO_ECU_CACHE 511
3442
3443static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3444
3445static void init_nia_to_ecu_cache ( void )
3446{
3447   UWord       i;
3448   Addr        zero_addr = 0;
3449   ExeContext* zero_ec;
3450   UInt        zero_ecu;
3451   /* Fill all the slots with an entry for address zero, and the
3452      relevant otags accordingly.  Hence the cache is initially filled
3453      with valid data. */
3454   zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3455   tl_assert(zero_ec);
3456   zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3457   tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3458   for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3459      nia_to_ecu_cache[i].nia0 = zero_addr;
3460      nia_to_ecu_cache[i].ecu0 = zero_ecu;
3461      nia_to_ecu_cache[i].nia1 = zero_addr;
3462      nia_to_ecu_cache[i].ecu1 = zero_ecu;
3463   }
3464}
3465
3466static inline UInt convert_nia_to_ecu ( Addr nia )
3467{
3468   UWord i;
3469   UInt        ecu;
3470   ExeContext* ec;
3471
3472   tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3473
3474   stats__nia_cache_queries++;
3475   i = nia % N_NIA_TO_ECU_CACHE;
3476   tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3477
3478   if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3479      return nia_to_ecu_cache[i].ecu0;
3480
3481   if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3482#     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3483      SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3484      SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3485#     undef SWAP
3486      return nia_to_ecu_cache[i].ecu0;
3487   }
3488
3489   stats__nia_cache_misses++;
3490   ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3491   tl_assert(ec);
3492   ecu = VG_(get_ECU_from_ExeContext)(ec);
3493   tl_assert(VG_(is_plausible_ECU)(ecu));
3494
3495   nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3496   nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3497
3498   nia_to_ecu_cache[i].nia0 = nia;
3499   nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3500   return ecu;
3501}
3502
3503
3504/* Note that this serves both the origin-tracking and
3505   no-origin-tracking modes.  We assume that calls to it are
3506   sufficiently infrequent that it isn't worth specialising for the
3507   with/without origin-tracking cases. */
3508void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3509{
3510   UInt otag;
3511   tl_assert(sizeof(UWord) == sizeof(SizeT));
3512   if (0)
3513      VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3514                  base, len, nia );
3515
3516   if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3517      UInt ecu = convert_nia_to_ecu ( nia );
3518      tl_assert(VG_(is_plausible_ECU)(ecu));
3519      otag = ecu | MC_OKIND_STACK;
3520   } else {
3521      tl_assert(nia == 0);
3522      otag = 0;
3523   }
3524
3525#  if 0
3526   /* Really slow version */
3527   MC_(make_mem_undefined)(base, len, otag);
3528#  endif
3529
3530#  if 0
3531   /* Slow(ish) version, which is fairly easily seen to be correct.
3532   */
3533   if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3534      make_aligned_word64_undefined(base +   0, otag);
3535      make_aligned_word64_undefined(base +   8, otag);
3536      make_aligned_word64_undefined(base +  16, otag);
3537      make_aligned_word64_undefined(base +  24, otag);
3538
3539      make_aligned_word64_undefined(base +  32, otag);
3540      make_aligned_word64_undefined(base +  40, otag);
3541      make_aligned_word64_undefined(base +  48, otag);
3542      make_aligned_word64_undefined(base +  56, otag);
3543
3544      make_aligned_word64_undefined(base +  64, otag);
3545      make_aligned_word64_undefined(base +  72, otag);
3546      make_aligned_word64_undefined(base +  80, otag);
3547      make_aligned_word64_undefined(base +  88, otag);
3548
3549      make_aligned_word64_undefined(base +  96, otag);
3550      make_aligned_word64_undefined(base + 104, otag);
3551      make_aligned_word64_undefined(base + 112, otag);
3552      make_aligned_word64_undefined(base + 120, otag);
3553   } else {
3554      MC_(make_mem_undefined)(base, len, otag);
3555   }
3556#  endif
3557
3558   /* Idea is: go fast when
3559         * 8-aligned and length is 128
3560         * the sm is available in the main primary map
3561         * the address range falls entirely with a single secondary map
3562      If all those conditions hold, just update the V+A bits by writing
3563      directly into the vabits array.  (If the sm was distinguished, this
3564      will make a copy and then write to it.)
3565   */
3566
3567   if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3568      /* Now we know the address range is suitably sized and aligned. */
3569      UWord a_lo = (UWord)(base);
3570      UWord a_hi = (UWord)(base + 128 - 1);
3571      tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3572      if (a_hi <= MAX_PRIMARY_ADDRESS) {
3573         // Now we know the entire range is within the main primary map.
3574         SecMap* sm    = get_secmap_for_writing_low(a_lo);
3575         SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3576         /* Now we know that the entire address range falls within a
3577            single secondary map, and that that secondary 'lives' in
3578            the main primary map. */
3579         if (LIKELY(sm == sm_hi)) {
3580            // Finally, we know that the range is entirely within one secmap.
3581            UWord   v_off = SM_OFF(a_lo);
3582            UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3583            p[ 0] = VA_BITS16_UNDEFINED;
3584            p[ 1] = VA_BITS16_UNDEFINED;
3585            p[ 2] = VA_BITS16_UNDEFINED;
3586            p[ 3] = VA_BITS16_UNDEFINED;
3587            p[ 4] = VA_BITS16_UNDEFINED;
3588            p[ 5] = VA_BITS16_UNDEFINED;
3589            p[ 6] = VA_BITS16_UNDEFINED;
3590            p[ 7] = VA_BITS16_UNDEFINED;
3591            p[ 8] = VA_BITS16_UNDEFINED;
3592            p[ 9] = VA_BITS16_UNDEFINED;
3593            p[10] = VA_BITS16_UNDEFINED;
3594            p[11] = VA_BITS16_UNDEFINED;
3595            p[12] = VA_BITS16_UNDEFINED;
3596            p[13] = VA_BITS16_UNDEFINED;
3597            p[14] = VA_BITS16_UNDEFINED;
3598            p[15] = VA_BITS16_UNDEFINED;
3599            if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3600               set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3601               set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3602               set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3603               set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3604               set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3605               set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3606               set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3607               set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3608               set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3609               set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3610               set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3611               set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3612               set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3613               set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3614               set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3615               set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3616            }
3617            return;
3618         }
3619      }
3620   }
3621
3622   /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3623   if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3624      /* Now we know the address range is suitably sized and aligned. */
3625      UWord a_lo = (UWord)(base);
3626      UWord a_hi = (UWord)(base + 288 - 1);
3627      tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3628      if (a_hi <= MAX_PRIMARY_ADDRESS) {
3629         // Now we know the entire range is within the main primary map.
3630         SecMap* sm    = get_secmap_for_writing_low(a_lo);
3631         SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3632         /* Now we know that the entire address range falls within a
3633            single secondary map, and that that secondary 'lives' in
3634            the main primary map. */
3635         if (LIKELY(sm == sm_hi)) {
3636            // Finally, we know that the range is entirely within one secmap.
3637            UWord   v_off = SM_OFF(a_lo);
3638            UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3639            p[ 0] = VA_BITS16_UNDEFINED;
3640            p[ 1] = VA_BITS16_UNDEFINED;
3641            p[ 2] = VA_BITS16_UNDEFINED;
3642            p[ 3] = VA_BITS16_UNDEFINED;
3643            p[ 4] = VA_BITS16_UNDEFINED;
3644            p[ 5] = VA_BITS16_UNDEFINED;
3645            p[ 6] = VA_BITS16_UNDEFINED;
3646            p[ 7] = VA_BITS16_UNDEFINED;
3647            p[ 8] = VA_BITS16_UNDEFINED;
3648            p[ 9] = VA_BITS16_UNDEFINED;
3649            p[10] = VA_BITS16_UNDEFINED;
3650            p[11] = VA_BITS16_UNDEFINED;
3651            p[12] = VA_BITS16_UNDEFINED;
3652            p[13] = VA_BITS16_UNDEFINED;
3653            p[14] = VA_BITS16_UNDEFINED;
3654            p[15] = VA_BITS16_UNDEFINED;
3655            p[16] = VA_BITS16_UNDEFINED;
3656            p[17] = VA_BITS16_UNDEFINED;
3657            p[18] = VA_BITS16_UNDEFINED;
3658            p[19] = VA_BITS16_UNDEFINED;
3659            p[20] = VA_BITS16_UNDEFINED;
3660            p[21] = VA_BITS16_UNDEFINED;
3661            p[22] = VA_BITS16_UNDEFINED;
3662            p[23] = VA_BITS16_UNDEFINED;
3663            p[24] = VA_BITS16_UNDEFINED;
3664            p[25] = VA_BITS16_UNDEFINED;
3665            p[26] = VA_BITS16_UNDEFINED;
3666            p[27] = VA_BITS16_UNDEFINED;
3667            p[28] = VA_BITS16_UNDEFINED;
3668            p[29] = VA_BITS16_UNDEFINED;
3669            p[30] = VA_BITS16_UNDEFINED;
3670            p[31] = VA_BITS16_UNDEFINED;
3671            p[32] = VA_BITS16_UNDEFINED;
3672            p[33] = VA_BITS16_UNDEFINED;
3673            p[34] = VA_BITS16_UNDEFINED;
3674            p[35] = VA_BITS16_UNDEFINED;
3675            if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3676               set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3677               set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3678               set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3679               set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3680               set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3681               set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3682               set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3683               set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3684               set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3685               set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3686               set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3687               set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3688               set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3689               set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3690               set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3691               set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3692               set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3693               set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3694               set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3695               set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3696               set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3697               set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3698               set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3699               set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3700               set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3701               set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3702               set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3703               set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3704               set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3705               set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3706               set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3707               set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3708               set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3709               set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3710               set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3711               set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3712            }
3713            return;
3714         }
3715      }
3716   }
3717
3718   /* else fall into slow case */
3719   MC_(make_mem_undefined_w_otag)(base, len, otag);
3720}
3721
3722
3723/*------------------------------------------------------------*/
3724/*--- Checking memory                                      ---*/
3725/*------------------------------------------------------------*/
3726
3727typedef
3728   enum {
3729      MC_Ok = 5,
3730      MC_AddrErr = 6,
3731      MC_ValueErr = 7
3732   }
3733   MC_ReadResult;
3734
3735
3736/* Check permissions for address range.  If inadequate permissions
3737   exist, *bad_addr is set to the offending address, so the caller can
3738   know what it is. */
3739
3740/* Returns True if [a .. a+len) is not addressible.  Otherwise,
3741   returns False, and if bad_addr is non-NULL, sets *bad_addr to
3742   indicate the lowest failing address.  Functions below are
3743   similar. */
3744Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3745{
3746   SizeT i;
3747   UWord vabits2;
3748
3749   PROF_EVENT(60, "check_mem_is_noaccess");
3750   for (i = 0; i < len; i++) {
3751      PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3752      vabits2 = get_vabits2(a);
3753      if (VA_BITS2_NOACCESS != vabits2) {
3754         if (bad_addr != NULL) *bad_addr = a;
3755         return False;
3756      }
3757      a++;
3758   }
3759   return True;
3760}
3761
3762static Bool is_mem_addressable ( Addr a, SizeT len,
3763                                 /*OUT*/Addr* bad_addr )
3764{
3765   SizeT i;
3766   UWord vabits2;
3767
3768   PROF_EVENT(62, "is_mem_addressable");
3769   for (i = 0; i < len; i++) {
3770      PROF_EVENT(63, "is_mem_addressable(loop)");
3771      vabits2 = get_vabits2(a);
3772      if (VA_BITS2_NOACCESS == vabits2) {
3773         if (bad_addr != NULL) *bad_addr = a;
3774         return False;
3775      }
3776      a++;
3777   }
3778   return True;
3779}
3780
3781static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3782                                      /*OUT*/Addr* bad_addr,
3783                                      /*OUT*/UInt* otag )
3784{
3785   SizeT i;
3786   UWord vabits2;
3787
3788   PROF_EVENT(64, "is_mem_defined");
3789   DEBUG("is_mem_defined\n");
3790
3791   if (otag)     *otag = 0;
3792   if (bad_addr) *bad_addr = 0;
3793   for (i = 0; i < len; i++) {
3794      PROF_EVENT(65, "is_mem_defined(loop)");
3795      vabits2 = get_vabits2(a);
3796      if (VA_BITS2_DEFINED != vabits2) {
3797         // Error!  Nb: Report addressability errors in preference to
3798         // definedness errors.  And don't report definedeness errors unless
3799         // --undef-value-errors=yes.
3800         if (bad_addr) {
3801            *bad_addr = a;
3802         }
3803         if (VA_BITS2_NOACCESS == vabits2) {
3804            return MC_AddrErr;
3805         }
3806         if (MC_(clo_mc_level) >= 2) {
3807            if (otag && MC_(clo_mc_level) == 3) {
3808               *otag = MC_(helperc_b_load1)( a );
3809            }
3810            return MC_ValueErr;
3811         }
3812      }
3813      a++;
3814   }
3815   return MC_Ok;
3816}
3817
3818
3819/* Like is_mem_defined but doesn't give up at the first uninitialised
3820   byte -- the entire range is always checked.  This is important for
3821   detecting errors in the case where a checked range strays into
3822   invalid memory, but that fact is not detected by the ordinary
3823   is_mem_defined(), because of an undefined section that precedes the
3824   out of range section, possibly as a result of an alignment hole in
3825   the checked data.  This version always checks the entire range and
3826   can report both a definedness and an accessbility error, if
3827   necessary. */
3828static void is_mem_defined_comprehensive (
3829               Addr a, SizeT len,
3830               /*OUT*/Bool* errorV,    /* is there a definedness err? */
3831               /*OUT*/Addr* bad_addrV, /* if so where? */
3832               /*OUT*/UInt* otagV,     /* and what's its otag? */
3833               /*OUT*/Bool* errorA,    /* is there an addressability err? */
3834               /*OUT*/Addr* bad_addrA  /* if so where? */
3835            )
3836{
3837   SizeT i;
3838   UWord vabits2;
3839   Bool  already_saw_errV = False;
3840
3841   PROF_EVENT(64, "is_mem_defined"); // fixme
3842   DEBUG("is_mem_defined_comprehensive\n");
3843
3844   tl_assert(!(*errorV || *errorA));
3845
3846   for (i = 0; i < len; i++) {
3847      PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3848      vabits2 = get_vabits2(a);
3849      switch (vabits2) {
3850         case VA_BITS2_DEFINED:
3851            a++;
3852            break;
3853         case VA_BITS2_UNDEFINED:
3854         case VA_BITS2_PARTDEFINED:
3855            if (!already_saw_errV) {
3856               *errorV    = True;
3857               *bad_addrV = a;
3858               if (MC_(clo_mc_level) == 3) {
3859                  *otagV = MC_(helperc_b_load1)( a );
3860               } else {
3861                  *otagV = 0;
3862               }
3863               already_saw_errV = True;
3864            }
3865            a++; /* keep going */
3866            break;
3867         case VA_BITS2_NOACCESS:
3868            *errorA    = True;
3869            *bad_addrA = a;
3870            return; /* give up now. */
3871         default:
3872            tl_assert(0);
3873      }
3874   }
3875}
3876
3877
3878/* Check a zero-terminated ascii string.  Tricky -- don't want to
3879   examine the actual bytes, to find the end, until we're sure it is
3880   safe to do so. */
3881
3882static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3883{
3884   UWord vabits2;
3885
3886   PROF_EVENT(66, "mc_is_defined_asciiz");
3887   DEBUG("mc_is_defined_asciiz\n");
3888
3889   if (otag)     *otag = 0;
3890   if (bad_addr) *bad_addr = 0;
3891   while (True) {
3892      PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3893      vabits2 = get_vabits2(a);
3894      if (VA_BITS2_DEFINED != vabits2) {
3895         // Error!  Nb: Report addressability errors in preference to
3896         // definedness errors.  And don't report definedeness errors unless
3897         // --undef-value-errors=yes.
3898         if (bad_addr) {
3899            *bad_addr = a;
3900         }
3901         if (VA_BITS2_NOACCESS == vabits2) {
3902            return MC_AddrErr;
3903         }
3904         if (MC_(clo_mc_level) >= 2) {
3905            if (otag && MC_(clo_mc_level) == 3) {
3906               *otag = MC_(helperc_b_load1)( a );
3907            }
3908            return MC_ValueErr;
3909         }
3910      }
3911      /* Ok, a is safe to read. */
3912      if (* ((UChar*)a) == 0) {
3913         return MC_Ok;
3914      }
3915      a++;
3916   }
3917}
3918
3919
3920/*------------------------------------------------------------*/
3921/*--- Memory event handlers                                ---*/
3922/*------------------------------------------------------------*/
3923
3924static
3925void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
3926                                Addr base, SizeT size )
3927{
3928   Addr bad_addr;
3929   Bool ok = is_mem_addressable ( base, size, &bad_addr );
3930
3931   if (!ok) {
3932      switch (part) {
3933      case Vg_CoreSysCall:
3934         MC_(record_memparam_error) ( tid, bad_addr,
3935                                      /*isAddrErr*/True, s, 0/*otag*/ );
3936         break;
3937
3938      case Vg_CoreSignal:
3939         MC_(record_core_mem_error)( tid, s );
3940         break;
3941
3942      default:
3943         VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3944      }
3945   }
3946}
3947
3948static
3949void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
3950                            Addr base, SizeT size )
3951{
3952   UInt otag = 0;
3953   Addr bad_addr;
3954   MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3955
3956   if (MC_Ok != res) {
3957      Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3958
3959      switch (part) {
3960      case Vg_CoreSysCall:
3961         MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3962                                      isAddrErr ? 0 : otag );
3963         break;
3964
3965      case Vg_CoreSysCallArgInMem:
3966         MC_(record_regparam_error) ( tid, s, otag );
3967         break;
3968
3969      /* If we're being asked to jump to a silly address, record an error
3970         message before potentially crashing the entire system. */
3971      case Vg_CoreTranslate:
3972         MC_(record_jump_error)( tid, bad_addr );
3973         break;
3974
3975      default:
3976         VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3977      }
3978   }
3979}
3980
3981static
3982void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3983                                   const HChar* s, Addr str )
3984{
3985   MC_ReadResult res;
3986   Addr bad_addr = 0;   // shut GCC up
3987   UInt otag = 0;
3988
3989   tl_assert(part == Vg_CoreSysCall);
3990   res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3991   if (MC_Ok != res) {
3992      Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3993      MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3994                                   isAddrErr ? 0 : otag );
3995   }
3996}
3997
3998/* Handling of mmap and mprotect is not as simple as it seems.
3999
4000   The underlying semantics are that memory obtained from mmap is
4001   always initialised, but may be inaccessible.  And changes to the
4002   protection of memory do not change its contents and hence not its
4003   definedness state.  Problem is we can't model
4004   inaccessible-but-with-some-definedness state; once we mark memory
4005   as inaccessible we lose all info about definedness, and so can't
4006   restore that if it is later made accessible again.
4007
4008   One obvious thing to do is this:
4009
4010      mmap/mprotect NONE  -> noaccess
4011      mmap/mprotect other -> defined
4012
4013   The problem case here is: taking accessible memory, writing
4014   uninitialised data to it, mprotecting it NONE and later mprotecting
4015   it back to some accessible state causes the undefinedness to be
4016   lost.
4017
4018   A better proposal is:
4019
4020     (1) mmap NONE       ->  make noaccess
4021     (2) mmap other      ->  make defined
4022
4023     (3) mprotect NONE   ->  # no change
4024     (4) mprotect other  ->  change any "noaccess" to "defined"
4025
4026   (2) is OK because memory newly obtained from mmap really is defined
4027       (zeroed out by the kernel -- doing anything else would
4028       constitute a massive security hole.)
4029
4030   (1) is OK because the only way to make the memory usable is via
4031       (4), in which case we also wind up correctly marking it all as
4032       defined.
4033
4034   (3) is the weak case.  We choose not to change memory state.
4035       (presumably the range is in some mixture of "defined" and
4036       "undefined", viz, accessible but with arbitrary V bits).  Doing
4037       nothing means we retain the V bits, so that if the memory is
4038       later mprotected "other", the V bits remain unchanged, so there
4039       can be no false negatives.  The bad effect is that if there's
4040       an access in the area, then MC cannot warn; but at least we'll
4041       get a SEGV to show, so it's better than nothing.
4042
4043   Consider the sequence (3) followed by (4).  Any memory that was
4044   "defined" or "undefined" previously retains its state (as
4045   required).  Any memory that was "noaccess" before can only have
4046   been made that way by (1), and so it's OK to change it to
4047   "defined".
4048
4049   See https://bugs.kde.org/show_bug.cgi?id=205541
4050   and https://bugs.kde.org/show_bug.cgi?id=210268
4051*/
4052static
4053void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4054                       ULong di_handle )
4055{
4056   if (rr || ww || xx) {
4057      /* (2) mmap/mprotect other -> defined */
4058      MC_(make_mem_defined)(a, len);
4059   } else {
4060      /* (1) mmap/mprotect NONE  -> noaccess */
4061      MC_(make_mem_noaccess)(a, len);
4062   }
4063}
4064
4065static
4066void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4067{
4068   if (rr || ww || xx) {
4069      /* (4) mprotect other  ->  change any "noaccess" to "defined" */
4070      make_mem_defined_if_noaccess(a, len);
4071   } else {
4072      /* (3) mprotect NONE   ->  # no change */
4073      /* do nothing */
4074   }
4075}
4076
4077
4078static
4079void mc_new_mem_startup( Addr a, SizeT len,
4080                         Bool rr, Bool ww, Bool xx, ULong di_handle )
4081{
4082   // Because code is defined, initialised variables get put in the data
4083   // segment and are defined, and uninitialised variables get put in the
4084   // bss segment and are auto-zeroed (and so defined).
4085   //
4086   // It's possible that there will be padding between global variables.
4087   // This will also be auto-zeroed, and marked as defined by Memcheck.  If
4088   // a program uses it, Memcheck will not complain.  This is arguably a
4089   // false negative, but it's a grey area -- the behaviour is defined (the
4090   // padding is zeroed) but it's probably not what the user intended.  And
4091   // we can't avoid it.
4092   //
4093   // Note: we generally ignore RWX permissions, because we can't track them
4094   // without requiring more than one A bit which would slow things down a
4095   // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
4096   // So we mark any such pages as "unaddressable".
4097   DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4098         a, (ULong)len, rr, ww, xx);
4099   mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4100}
4101
4102static
4103void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4104{
4105   MC_(make_mem_defined)(a, len);
4106}
4107
4108
4109/*------------------------------------------------------------*/
4110/*--- Register event handlers                              ---*/
4111/*------------------------------------------------------------*/
4112
4113/* Try and get a nonzero origin for the guest state section of thread
4114   tid characterised by (offset,size).  Return 0 if nothing to show
4115   for it. */
4116static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4117                                             Int offset, SizeT size )
4118{
4119   Int   sh2off;
4120   UInt  area[3];
4121   UInt  otag;
4122   sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4123   if (sh2off == -1)
4124      return 0;  /* This piece of guest state is not tracked */
4125   tl_assert(sh2off >= 0);
4126   tl_assert(0 == (sh2off % 4));
4127   area[0] = 0x31313131;
4128   area[2] = 0x27272727;
4129   VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4130   tl_assert(area[0] == 0x31313131);
4131   tl_assert(area[2] == 0x27272727);
4132   otag = area[1];
4133   return otag;
4134}
4135
4136
4137/* When some chunk of guest state is written, mark the corresponding
4138   shadow area as valid.  This is used to initialise arbitrarily large
4139   chunks of guest state, hence the _SIZE value, which has to be as
4140   big as the biggest guest state.
4141*/
4142static void mc_post_reg_write ( CorePart part, ThreadId tid,
4143                                PtrdiffT offset, SizeT size)
4144{
4145#  define MAX_REG_WRITE_SIZE 1712
4146   UChar area[MAX_REG_WRITE_SIZE];
4147   tl_assert(size <= MAX_REG_WRITE_SIZE);
4148   VG_(memset)(area, V_BITS8_DEFINED, size);
4149   VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4150#  undef MAX_REG_WRITE_SIZE
4151}
4152
4153static
4154void mc_post_reg_write_clientcall ( ThreadId tid,
4155                                    PtrdiffT offset, SizeT size, Addr f)
4156{
4157   mc_post_reg_write(/*dummy*/0, tid, offset, size);
4158}
4159
4160/* Look at the definedness of the guest's shadow state for
4161   [offset, offset+len).  If any part of that is undefined, record
4162   a parameter error.
4163*/
4164static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4165                              PtrdiffT offset, SizeT size)
4166{
4167   Int   i;
4168   Bool  bad;
4169   UInt  otag;
4170
4171   UChar area[16];
4172   tl_assert(size <= 16);
4173
4174   VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4175
4176   bad = False;
4177   for (i = 0; i < size; i++) {
4178      if (area[i] != V_BITS8_DEFINED) {
4179         bad = True;
4180         break;
4181      }
4182   }
4183
4184   if (!bad)
4185      return;
4186
4187   /* We've found some undefinedness.  See if we can also find an
4188      origin for it. */
4189   otag = mb_get_origin_for_guest_offset( tid, offset, size );
4190   MC_(record_regparam_error) ( tid, s, otag );
4191}
4192
4193
4194/*------------------------------------------------------------*/
4195/*--- Functions called directly from generated code:       ---*/
4196/*--- Load/store handlers.                                 ---*/
4197/*------------------------------------------------------------*/
4198
4199/* Types:  LOADV32, LOADV16, LOADV8 are:
4200               UWord fn ( Addr a )
4201   so they return 32-bits on 32-bit machines and 64-bits on
4202   64-bit machines.  Addr has the same size as a host word.
4203
4204   LOADV64 is always  ULong fn ( Addr a )
4205
4206   Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4207   are a UWord, and for STOREV64 they are a ULong.
4208*/
4209
4210/* If any part of '_a' indicated by the mask is 1, either '_a' is not
4211   naturally '_sz/8'-aligned, or it exceeds the range covered by the
4212   primary map.  This is all very tricky (and important!), so let's
4213   work through the maths by hand (below), *and* assert for these
4214   values at startup. */
4215#define MASK(_szInBytes) \
4216   ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4217
4218/* MASK only exists so as to define this macro. */
4219#define UNALIGNED_OR_HIGH(_a,_szInBits) \
4220   ((_a) & MASK((_szInBits>>3)))
4221
4222/* On a 32-bit machine:
4223
4224   N_PRIMARY_BITS          == 16, so
4225   N_PRIMARY_MAP           == 0x10000, so
4226   N_PRIMARY_MAP-1         == 0xFFFF, so
4227   (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4228
4229   MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4230           = ~ ( 0xFFFF | 0xFFFF0000 )
4231           = ~ 0xFFFF'FFFF
4232           = 0
4233
4234   MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4235           = ~ ( 0xFFFE | 0xFFFF0000 )
4236           = ~ 0xFFFF'FFFE
4237           = 1
4238
4239   MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4240           = ~ ( 0xFFFC | 0xFFFF0000 )
4241           = ~ 0xFFFF'FFFC
4242           = 3
4243
4244   MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4245           = ~ ( 0xFFF8 | 0xFFFF0000 )
4246           = ~ 0xFFFF'FFF8
4247           = 7
4248
4249   Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4250   precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
4251   the 1-byte alignment case, it is always a zero value, since MASK(1)
4252   is zero.  All as expected.
4253
4254   On a 64-bit machine, it's more complex, since we're testing
4255   simultaneously for misalignment and for the address being at or
4256   above 64G:
4257
4258   N_PRIMARY_BITS          == 20, so
4259   N_PRIMARY_MAP           == 0x100000, so
4260   N_PRIMARY_MAP-1         == 0xFFFFF, so
4261   (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4262
4263   MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4264           = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4265           = ~ 0xF'FFFF'FFFF
4266           = 0xFFFF'FFF0'0000'0000
4267
4268   MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4269           = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4270           = ~ 0xF'FFFF'FFFE
4271           = 0xFFFF'FFF0'0000'0001
4272
4273   MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4274           = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4275           = ~ 0xF'FFFF'FFFC
4276           = 0xFFFF'FFF0'0000'0003
4277
4278   MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4279           = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4280           = ~ 0xF'FFFF'FFF8
4281           = 0xFFFF'FFF0'0000'0007
4282*/
4283
4284
4285/* ------------------------ Size = 16 ------------------------ */
4286
4287static INLINE
4288void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4289                           Addr a, SizeT nBits, Bool isBigEndian )
4290{
4291   PROF_EVENT(200, "mc_LOADV_128_or_256");
4292
4293#ifndef PERF_FAST_LOADV
4294   mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4295   return;
4296#else
4297   {
4298      UWord   sm_off16, vabits16, j;
4299      UWord   nBytes  = nBits / 8;
4300      UWord   nULongs = nBytes / 8;
4301      SecMap* sm;
4302
4303      if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4304         PROF_EVENT(201, "mc_LOADV_128_or_256-slow1");
4305         mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4306         return;
4307      }
4308
4309      /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4310         suitably aligned, is mapped, and addressible. */
4311      for (j = 0; j < nULongs; j++) {
4312         sm       = get_secmap_for_reading_low(a + 8*j);
4313         sm_off16 = SM_OFF_16(a + 8*j);
4314         vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4315
4316         // Convert V bits from compact memory form to expanded
4317         // register form.
4318         if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4319            res[j] = V_BITS64_DEFINED;
4320         } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4321            res[j] = V_BITS64_UNDEFINED;
4322         } else {
4323            /* Slow case: some block of 8 bytes are not all-defined or
4324               all-undefined. */
4325            PROF_EVENT(202, "mc_LOADV_128_or_256-slow2");
4326            mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4327            return;
4328         }
4329      }
4330      return;
4331   }
4332#endif
4333}
4334
4335VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4336{
4337   mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4338}
4339VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4340{
4341   mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4342}
4343
4344VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4345{
4346   mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4347}
4348VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4349{
4350   mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4351}
4352
4353/* ------------------------ Size = 8 ------------------------ */
4354
4355static INLINE
4356ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4357{
4358   PROF_EVENT(200, "mc_LOADV64");
4359
4360#ifndef PERF_FAST_LOADV
4361   return mc_LOADVn_slow( a, 64, isBigEndian );
4362#else
4363   {
4364      UWord   sm_off16, vabits16;
4365      SecMap* sm;
4366
4367      if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4368         PROF_EVENT(201, "mc_LOADV64-slow1");
4369         return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4370      }
4371
4372      sm       = get_secmap_for_reading_low(a);
4373      sm_off16 = SM_OFF_16(a);
4374      vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4375
4376      // Handle common case quickly: a is suitably aligned, is mapped, and
4377      // addressible.
4378      // Convert V bits from compact memory form to expanded register form.
4379      if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4380         return V_BITS64_DEFINED;
4381      } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4382         return V_BITS64_UNDEFINED;
4383      } else {
4384         /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4385         PROF_EVENT(202, "mc_LOADV64-slow2");
4386         return mc_LOADVn_slow( a, 64, isBigEndian );
4387      }
4388   }
4389#endif
4390}
4391
4392VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4393{
4394   return mc_LOADV64(a, True);
4395}
4396VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4397{
4398   return mc_LOADV64(a, False);
4399}
4400
4401
4402static INLINE
4403void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4404{
4405   PROF_EVENT(210, "mc_STOREV64");
4406
4407#ifndef PERF_FAST_STOREV
4408   // XXX: this slow case seems to be marginally faster than the fast case!
4409   // Investigate further.
4410   mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4411#else
4412   {
4413      UWord   sm_off16, vabits16;
4414      SecMap* sm;
4415
4416      if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4417         PROF_EVENT(211, "mc_STOREV64-slow1");
4418         mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4419         return;
4420      }
4421
4422      sm       = get_secmap_for_reading_low(a);
4423      sm_off16 = SM_OFF_16(a);
4424      vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4425
4426      // To understand the below cleverness, see the extensive comments
4427      // in MC_(helperc_STOREV8).
4428      if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4429         if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4430            return;
4431         }
4432         if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4433            ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4434            return;
4435         }
4436         PROF_EVENT(232, "mc_STOREV64-slow2");
4437         mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4438         return;
4439      }
4440      if (V_BITS64_UNDEFINED == vbits64) {
4441         if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4442            return;
4443         }
4444         if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4445            ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4446            return;
4447         }
4448         PROF_EVENT(232, "mc_STOREV64-slow3");
4449         mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4450         return;
4451      }
4452
4453      PROF_EVENT(212, "mc_STOREV64-slow4");
4454      mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4455   }
4456#endif
4457}
4458
4459VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4460{
4461   mc_STOREV64(a, vbits64, True);
4462}
4463VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4464{
4465   mc_STOREV64(a, vbits64, False);
4466}
4467
4468
4469/* ------------------------ Size = 4 ------------------------ */
4470
4471static INLINE
4472UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4473{
4474   PROF_EVENT(220, "mc_LOADV32");
4475
4476#ifndef PERF_FAST_LOADV
4477   return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4478#else
4479   {
4480      UWord   sm_off, vabits8;
4481      SecMap* sm;
4482
4483      if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4484         PROF_EVENT(221, "mc_LOADV32-slow1");
4485         return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4486      }
4487
4488      sm      = get_secmap_for_reading_low(a);
4489      sm_off  = SM_OFF(a);
4490      vabits8 = sm->vabits8[sm_off];
4491
4492      // Handle common case quickly: a is suitably aligned, is mapped, and the
4493      // entire word32 it lives in is addressible.
4494      // Convert V bits from compact memory form to expanded register form.
4495      // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4496      // Almost certainly not necessary, but be paranoid.
4497      if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4498         return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4499      } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4500         return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4501      } else {
4502         /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4503         PROF_EVENT(222, "mc_LOADV32-slow2");
4504         return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4505      }
4506   }
4507#endif
4508}
4509
4510VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4511{
4512   return mc_LOADV32(a, True);
4513}
4514VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4515{
4516   return mc_LOADV32(a, False);
4517}
4518
4519
4520static INLINE
4521void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4522{
4523   PROF_EVENT(230, "mc_STOREV32");
4524
4525#ifndef PERF_FAST_STOREV
4526   mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4527#else
4528   {
4529      UWord   sm_off, vabits8;
4530      SecMap* sm;
4531
4532      if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4533         PROF_EVENT(231, "mc_STOREV32-slow1");
4534         mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4535         return;
4536      }
4537
4538      sm      = get_secmap_for_reading_low(a);
4539      sm_off  = SM_OFF(a);
4540      vabits8 = sm->vabits8[sm_off];
4541
4542      // To understand the below cleverness, see the extensive comments
4543      // in MC_(helperc_STOREV8).
4544      if (LIKELY(V_BITS32_DEFINED == vbits32)) {
4545         if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
4546            return;
4547         }
4548         if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
4549            sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4550            return;
4551         }
4552         PROF_EVENT(232, "mc_STOREV32-slow2");
4553         mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4554         return;
4555      }
4556      if (V_BITS32_UNDEFINED == vbits32) {
4557         if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4558            return;
4559         }
4560         if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4561            sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4562            return;
4563         }
4564         PROF_EVENT(233, "mc_STOREV32-slow3");
4565         mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4566         return;
4567      }
4568
4569      PROF_EVENT(234, "mc_STOREV32-slow4");
4570      mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4571   }
4572#endif
4573}
4574
4575VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4576{
4577   mc_STOREV32(a, vbits32, True);
4578}
4579VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4580{
4581   mc_STOREV32(a, vbits32, False);
4582}
4583
4584
4585/* ------------------------ Size = 2 ------------------------ */
4586
4587static INLINE
4588UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4589{
4590   PROF_EVENT(240, "mc_LOADV16");
4591
4592#ifndef PERF_FAST_LOADV
4593   return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4594#else
4595   {
4596      UWord   sm_off, vabits8;
4597      SecMap* sm;
4598
4599      if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4600         PROF_EVENT(241, "mc_LOADV16-slow1");
4601         return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4602      }
4603
4604      sm      = get_secmap_for_reading_low(a);
4605      sm_off  = SM_OFF(a);
4606      vabits8 = sm->vabits8[sm_off];
4607      // Handle common case quickly: a is suitably aligned, is mapped, and is
4608      // addressible.
4609      // Convert V bits from compact memory form to expanded register form
4610      if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
4611      else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4612      else {
4613         // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4614         // the two sub-bytes.
4615         UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4616         if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
4617         else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4618         else {
4619            /* Slow case: the two bytes are not all-defined or all-undefined. */
4620            PROF_EVENT(242, "mc_LOADV16-slow2");
4621            return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4622         }
4623      }
4624   }
4625#endif
4626}
4627
4628VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4629{
4630   return mc_LOADV16(a, True);
4631}
4632VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4633{
4634   return mc_LOADV16(a, False);
4635}
4636
4637/* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
4638static INLINE
4639Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
4640{
4641   UInt shift;
4642   tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
4643   shift = (a & 2) << 1;               // shift by 0 or 4
4644   vabits8 >>= shift;                  // shift the four bits to the bottom
4645    // check 2 x vabits2 != VA_BITS2_NOACCESS
4646   return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
4647      &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
4648}
4649
4650static INLINE
4651void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4652{
4653   PROF_EVENT(250, "mc_STOREV16");
4654
4655#ifndef PERF_FAST_STOREV
4656   mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4657#else
4658   {
4659      UWord   sm_off, vabits8;
4660      SecMap* sm;
4661
4662      if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4663         PROF_EVENT(251, "mc_STOREV16-slow1");
4664         mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4665         return;
4666      }
4667
4668      sm      = get_secmap_for_reading_low(a);
4669      sm_off  = SM_OFF(a);
4670      vabits8 = sm->vabits8[sm_off];
4671
4672      // To understand the below cleverness, see the extensive comments
4673      // in MC_(helperc_STOREV8).
4674      if (LIKELY(V_BITS16_DEFINED == vbits16)) {
4675         if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4676            return;
4677         }
4678         if (!is_distinguished_sm(sm)
4679             && accessible_vabits4_in_vabits8(a, vabits8)) {
4680            insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
4681                                         &(sm->vabits8[sm_off]) );
4682            return;
4683         }
4684         PROF_EVENT(232, "mc_STOREV16-slow2");
4685         mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4686      }
4687      if (V_BITS16_UNDEFINED == vbits16) {
4688         if (vabits8 == VA_BITS8_UNDEFINED) {
4689            return;
4690         }
4691         if (!is_distinguished_sm(sm)
4692             && accessible_vabits4_in_vabits8(a, vabits8)) {
4693            insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4694                                         &(sm->vabits8[sm_off]) );
4695            return;
4696         }
4697         PROF_EVENT(233, "mc_STOREV16-slow3");
4698         mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4699         return;
4700      }
4701
4702      PROF_EVENT(234, "mc_STOREV16-slow4");
4703      mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4704   }
4705#endif
4706}
4707
4708VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4709{
4710   mc_STOREV16(a, vbits16, True);
4711}
4712VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4713{
4714   mc_STOREV16(a, vbits16, False);
4715}
4716
4717
4718/* ------------------------ Size = 1 ------------------------ */
4719/* Note: endianness is irrelevant for size == 1 */
4720
4721VG_REGPARM(1)
4722UWord MC_(helperc_LOADV8) ( Addr a )
4723{
4724   PROF_EVENT(260, "mc_LOADV8");
4725
4726#ifndef PERF_FAST_LOADV
4727   return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4728#else
4729   {
4730      UWord   sm_off, vabits8;
4731      SecMap* sm;
4732
4733      if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4734         PROF_EVENT(261, "mc_LOADV8-slow1");
4735         return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4736      }
4737
4738      sm      = get_secmap_for_reading_low(a);
4739      sm_off  = SM_OFF(a);
4740      vabits8 = sm->vabits8[sm_off];
4741      // Convert V bits from compact memory form to expanded register form
4742      // Handle common case quickly: a is mapped, and the entire
4743      // word32 it lives in is addressible.
4744      if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
4745      else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4746      else {
4747         // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4748         // the single byte.
4749         UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4750         if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
4751         else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4752         else {
4753            /* Slow case: the byte is not all-defined or all-undefined. */
4754            PROF_EVENT(262, "mc_LOADV8-slow2");
4755            return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4756         }
4757      }
4758   }
4759#endif
4760}
4761
4762
4763VG_REGPARM(2)
4764void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4765{
4766   PROF_EVENT(270, "mc_STOREV8");
4767
4768#ifndef PERF_FAST_STOREV
4769   mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4770#else
4771   {
4772      UWord   sm_off, vabits8;
4773      SecMap* sm;
4774
4775      if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4776         PROF_EVENT(271, "mc_STOREV8-slow1");
4777         mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4778         return;
4779      }
4780
4781      sm      = get_secmap_for_reading_low(a);
4782      sm_off  = SM_OFF(a);
4783      vabits8 = sm->vabits8[sm_off];
4784
4785      // Clevernesses to speed up storing V bits.
4786      // The 64/32/16 bit cases also have similar clevernesses, but it
4787      // works a little differently to the code below.
4788      //
4789      // Cleverness 1:  sometimes we don't have to write the shadow memory at
4790      // all, if we can tell that what we want to write is the same as what is
4791      // already there. These cases are marked below as "defined on defined" and
4792      // "undefined on undefined".
4793      //
4794      // Cleverness 2:
4795      // We also avoid to call mc_STOREVn_slow if the V bits can directly
4796      // be written in the secondary map. V bits can be directly written
4797      // if 4 conditions are respected:
4798      //   * The address for which V bits are written is naturally aligned
4799      //        on 1 byte  for STOREV8 (this is always true)
4800      //        on 2 bytes for STOREV16
4801      //        on 4 bytes for STOREV32
4802      //        on 8 bytes for STOREV64.
4803      //   * V bits being written are either fully defined or fully undefined.
4804      //     (for partially defined V bits, V bits cannot be directly written,
4805      //      as the secondary vbits table must be maintained).
4806      //   * the secmap is not distinguished (distinguished maps cannot be
4807      //     modified).
4808      //   * the memory corresponding to the V bits being written is
4809      //     accessible (if one or more bytes are not accessible,
4810      //     we must call mc_STOREVn_slow in order to report accessibility
4811      //     errors).
4812      //     Note that for STOREV32 and STOREV64, it is too expensive
4813      //     to verify the accessibility of each byte for the benefit it
4814      //     brings. Instead, a quicker check is done by comparing to
4815      //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
4816      //     but misses some opportunity of direct modifications.
4817      //     Checking each byte accessibility was measured for
4818      //     STOREV32+perf tests and was slowing down all perf tests.
4819      // The cases corresponding to cleverness 2 are marked below as
4820      // "direct mod".
4821      if (LIKELY(V_BITS8_DEFINED == vbits8)) {
4822         if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4823            return; // defined on defined
4824         }
4825         if (!is_distinguished_sm(sm)
4826             && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
4827            // direct mod
4828            insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4829                                         &(sm->vabits8[sm_off]) );
4830            return;
4831         }
4832         PROF_EVENT(232, "mc_STOREV8-slow2");
4833         mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4834         return;
4835      }
4836      if (V_BITS8_UNDEFINED == vbits8) {
4837         if (vabits8 == VA_BITS8_UNDEFINED) {
4838            return; // undefined on undefined
4839         }
4840         if (!is_distinguished_sm(sm)
4841             && (VA_BITS2_NOACCESS
4842                 != extract_vabits2_from_vabits8(a, vabits8))) {
4843            // direct mod
4844            insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4845                                         &(sm->vabits8[sm_off]) );
4846            return;
4847         }
4848         PROF_EVENT(233, "mc_STOREV8-slow3");
4849         mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4850         return;
4851      }
4852
4853      // Partially defined word
4854      PROF_EVENT(234, "mc_STOREV8-slow4");
4855      mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4856   }
4857#endif
4858}
4859
4860
4861/*------------------------------------------------------------*/
4862/*--- Functions called directly from generated code:       ---*/
4863/*--- Value-check failure handlers.                        ---*/
4864/*------------------------------------------------------------*/
4865
4866/* Call these ones when an origin is available ... */
4867VG_REGPARM(1)
4868void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4869   MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4870}
4871
4872VG_REGPARM(1)
4873void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4874   MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4875}
4876
4877VG_REGPARM(1)
4878void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4879   MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4880}
4881
4882VG_REGPARM(1)
4883void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4884   MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4885}
4886
4887VG_REGPARM(2)
4888void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4889   MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4890}
4891
4892/* ... and these when an origin isn't available. */
4893
4894VG_REGPARM(0)
4895void MC_(helperc_value_check0_fail_no_o) ( void ) {
4896   MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4897}
4898
4899VG_REGPARM(0)
4900void MC_(helperc_value_check1_fail_no_o) ( void ) {
4901   MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4902}
4903
4904VG_REGPARM(0)
4905void MC_(helperc_value_check4_fail_no_o) ( void ) {
4906   MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4907}
4908
4909VG_REGPARM(0)
4910void MC_(helperc_value_check8_fail_no_o) ( void ) {
4911   MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4912}
4913
4914VG_REGPARM(1)
4915void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4916   MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4917}
4918
4919
4920/*------------------------------------------------------------*/
4921/*--- Metadata get/set functions, for client requests.     ---*/
4922/*------------------------------------------------------------*/
4923
4924// Nb: this expands the V+A bits out into register-form V bits, even though
4925// they're in memory.  This is for backward compatibility, and because it's
4926// probably what the user wants.
4927
4928/* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4929   error [no longer used], 3 == addressing error. */
4930/* Nb: We used to issue various definedness/addressability errors from here,
4931   but we took them out because they ranged from not-very-helpful to
4932   downright annoying, and they complicated the error data structures. */
4933static Int mc_get_or_set_vbits_for_client (
4934   Addr a,
4935   Addr vbits,
4936   SizeT szB,
4937   Bool setting, /* True <=> set vbits,  False <=> get vbits */
4938   Bool is_client_request /* True <=> real user request
4939                             False <=> internal call from gdbserver */
4940)
4941{
4942   SizeT i;
4943   Bool  ok;
4944   UChar vbits8;
4945
4946   /* Check that arrays are addressible before doing any getting/setting.
4947      vbits to be checked only for real user request. */
4948   for (i = 0; i < szB; i++) {
4949      if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4950          (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4951         return 3;
4952      }
4953   }
4954
4955   /* Do the copy */
4956   if (setting) {
4957      /* setting */
4958      for (i = 0; i < szB; i++) {
4959         ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4960         tl_assert(ok);
4961      }
4962   } else {
4963      /* getting */
4964      for (i = 0; i < szB; i++) {
4965         ok = get_vbits8(a + i, &vbits8);
4966         tl_assert(ok);
4967         ((UChar*)vbits)[i] = vbits8;
4968      }
4969      if (is_client_request)
4970        // The bytes in vbits[] have now been set, so mark them as such.
4971        MC_(make_mem_defined)(vbits, szB);
4972   }
4973
4974   return 1;
4975}
4976
4977
4978/*------------------------------------------------------------*/
4979/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
4980/*------------------------------------------------------------*/
4981
4982/* For the memory leak detector, say whether an entire 64k chunk of
4983   address space is possibly in use, or not.  If in doubt return
4984   True.
4985*/
4986Bool MC_(is_within_valid_secondary) ( Addr a )
4987{
4988   SecMap* sm = maybe_get_secmap_for ( a );
4989   if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
4990      /* Definitely not in use. */
4991      return False;
4992   } else {
4993      return True;
4994   }
4995}
4996
4997
4998/* For the memory leak detector, say whether or not a given word
4999   address is to be regarded as valid. */
5000Bool MC_(is_valid_aligned_word) ( Addr a )
5001{
5002   tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5003   tl_assert(VG_IS_WORD_ALIGNED(a));
5004   if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5005      return False;
5006   if (sizeof(UWord) == 8) {
5007      if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5008         return False;
5009   }
5010   if (UNLIKELY(MC_(in_ignored_range)(a)))
5011      return False;
5012   else
5013      return True;
5014}
5015
5016
5017/*------------------------------------------------------------*/
5018/*--- Initialisation                                       ---*/
5019/*------------------------------------------------------------*/
5020
5021static void init_shadow_memory ( void )
5022{
5023   Int     i;
5024   SecMap* sm;
5025
5026   tl_assert(V_BIT_UNDEFINED   == 1);
5027   tl_assert(V_BIT_DEFINED     == 0);
5028   tl_assert(V_BITS8_UNDEFINED == 0xFF);
5029   tl_assert(V_BITS8_DEFINED   == 0);
5030
5031   /* Build the 3 distinguished secondaries */
5032   sm = &sm_distinguished[SM_DIST_NOACCESS];
5033   for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5034
5035   sm = &sm_distinguished[SM_DIST_UNDEFINED];
5036   for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5037
5038   sm = &sm_distinguished[SM_DIST_DEFINED];
5039   for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5040
5041   /* Set up the primary map. */
5042   /* These entries gradually get overwritten as the used address
5043      space expands. */
5044   for (i = 0; i < N_PRIMARY_MAP; i++)
5045      primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5046
5047   /* Auxiliary primary maps */
5048   init_auxmap_L1_L2();
5049
5050   /* auxmap_size = auxmap_used = 0;
5051      no ... these are statically initialised */
5052
5053   /* Secondary V bit table */
5054   secVBitTable = createSecVBitTable();
5055}
5056
5057
5058/*------------------------------------------------------------*/
5059/*--- Sanity check machinery (permanently engaged)         ---*/
5060/*------------------------------------------------------------*/
5061
5062static Bool mc_cheap_sanity_check ( void )
5063{
5064   n_sanity_cheap++;
5065   PROF_EVENT(490, "cheap_sanity_check");
5066   /* Check for sane operating level */
5067   if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5068      return False;
5069   /* nothing else useful we can rapidly check */
5070   return True;
5071}
5072
5073static Bool mc_expensive_sanity_check ( void )
5074{
5075   Int     i;
5076   Word    n_secmaps_found;
5077   SecMap* sm;
5078   const HChar*  errmsg;
5079   Bool    bad = False;
5080
5081   if (0) VG_(printf)("expensive sanity check\n");
5082   if (0) return True;
5083
5084   n_sanity_expensive++;
5085   PROF_EVENT(491, "expensive_sanity_check");
5086
5087   /* Check for sane operating level */
5088   if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5089      return False;
5090
5091   /* Check that the 3 distinguished SMs are still as they should be. */
5092
5093   /* Check noaccess DSM. */
5094   sm = &sm_distinguished[SM_DIST_NOACCESS];
5095   for (i = 0; i < SM_CHUNKS; i++)
5096      if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5097         bad = True;
5098
5099   /* Check undefined DSM. */
5100   sm = &sm_distinguished[SM_DIST_UNDEFINED];
5101   for (i = 0; i < SM_CHUNKS; i++)
5102      if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5103         bad = True;
5104
5105   /* Check defined DSM. */
5106   sm = &sm_distinguished[SM_DIST_DEFINED];
5107   for (i = 0; i < SM_CHUNKS; i++)
5108      if (sm->vabits8[i] != VA_BITS8_DEFINED)
5109         bad = True;
5110
5111   if (bad) {
5112      VG_(printf)("memcheck expensive sanity: "
5113                  "distinguished_secondaries have changed\n");
5114      return False;
5115   }
5116
5117   /* If we're not checking for undefined value errors, the secondary V bit
5118    * table should be empty. */
5119   if (MC_(clo_mc_level) == 1) {
5120      if (0 != VG_(OSetGen_Size)(secVBitTable))
5121         return False;
5122   }
5123
5124   /* check the auxiliary maps, very thoroughly */
5125   n_secmaps_found = 0;
5126   errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5127   if (errmsg) {
5128      VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5129      return False;
5130   }
5131
5132   /* n_secmaps_found is now the number referred to by the auxiliary
5133      primary map.  Now add on the ones referred to by the main
5134      primary map. */
5135   for (i = 0; i < N_PRIMARY_MAP; i++) {
5136      if (primary_map[i] == NULL) {
5137         bad = True;
5138      } else {
5139         if (!is_distinguished_sm(primary_map[i]))
5140            n_secmaps_found++;
5141      }
5142   }
5143
5144   /* check that the number of secmaps issued matches the number that
5145      are reachable (iow, no secmap leaks) */
5146   if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5147      bad = True;
5148
5149   if (bad) {
5150      VG_(printf)("memcheck expensive sanity: "
5151                  "apparent secmap leakage\n");
5152      return False;
5153   }
5154
5155   if (bad) {
5156      VG_(printf)("memcheck expensive sanity: "
5157                  "auxmap covers wrong address space\n");
5158      return False;
5159   }
5160
5161   /* there is only one pointer to each secmap (expensive) */
5162
5163   return True;
5164}
5165
5166/*------------------------------------------------------------*/
5167/*--- Command line args                                    ---*/
5168/*------------------------------------------------------------*/
5169
5170/* --partial-loads-ok: enable by default on MacOS.  The MacOS system
5171   graphics libraries are heavily vectorised, and not enabling this by
5172   default causes lots of false errors. */
5173#if defined(VGO_darwin)
5174Bool          MC_(clo_partial_loads_ok)       = True;
5175#else
5176Bool          MC_(clo_partial_loads_ok)       = False;
5177#endif
5178
5179Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
5180Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
5181LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
5182VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
5183UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
5184UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
5185UInt          MC_(clo_leak_check_heuristics)  = 0;
5186Bool          MC_(clo_workaround_gcc296_bugs) = False;
5187Int           MC_(clo_malloc_fill)            = -1;
5188Int           MC_(clo_free_fill)              = -1;
5189KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_then_free;
5190Int           MC_(clo_mc_level)               = 2;
5191Bool          MC_(clo_show_mismatched_frees)  = True;
5192
5193static const HChar * MC_(parse_leak_heuristics_tokens) =
5194   "-,stdstring,length64,newarray,multipleinheritance";
5195/* The first heuristic value (LchNone) has no keyword, as this is
5196   a fake heuristic used to collect the blocks found without any
5197   heuristic. */
5198
5199static Bool mc_process_cmd_line_options(const HChar* arg)
5200{
5201   const HChar* tmp_str;
5202   Int   tmp_show;
5203
5204   tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5205
5206   /* Set MC_(clo_mc_level):
5207         1 = A bit tracking only
5208         2 = A and V bit tracking, but no V bit origins
5209         3 = A and V bit tracking, and V bit origins
5210
5211      Do this by inspecting --undef-value-errors= and
5212      --track-origins=.  Reject the case --undef-value-errors=no
5213      --track-origins=yes as meaningless.
5214   */
5215   if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
5216      if (MC_(clo_mc_level) == 3) {
5217         goto bad_level;
5218      } else {
5219         MC_(clo_mc_level) = 1;
5220         return True;
5221      }
5222   }
5223   if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
5224      if (MC_(clo_mc_level) == 1)
5225         MC_(clo_mc_level) = 2;
5226      return True;
5227   }
5228   if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
5229      if (MC_(clo_mc_level) == 3)
5230         MC_(clo_mc_level) = 2;
5231      return True;
5232   }
5233   if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
5234      if (MC_(clo_mc_level) == 1) {
5235         goto bad_level;
5236      } else {
5237         MC_(clo_mc_level) = 3;
5238         return True;
5239      }
5240   }
5241
5242        if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
5243   else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
5244                       MC_(parse_leak_kinds_tokens),
5245                       MC_(clo_error_for_leak_kinds)) {}
5246   else if VG_USET_CLO(arg, "--show-leak-kinds",
5247                       MC_(parse_leak_kinds_tokens),
5248                       MC_(clo_show_leak_kinds)) {}
5249   else if VG_USET_CLO(arg, "--leak-check-heuristics",
5250                       MC_(parse_leak_heuristics_tokens),
5251                       MC_(clo_leak_check_heuristics)) {}
5252   else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
5253      if (tmp_show) {
5254         MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
5255      } else {
5256         MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
5257      }
5258   }
5259   else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
5260      if (tmp_show) {
5261         MC_(clo_show_leak_kinds) |= R2S(Possible);
5262      } else {
5263         MC_(clo_show_leak_kinds) &= ~R2S(Possible);
5264      }
5265   }
5266   else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
5267                                            MC_(clo_workaround_gcc296_bugs)) {}
5268
5269   else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
5270                                               0, 10*1000*1000*1000LL) {}
5271
5272   else if VG_BINT_CLO(arg, "--freelist-big-blocks",
5273                       MC_(clo_freelist_big_blocks),
5274                       0, 10*1000*1000*1000LL) {}
5275
5276   else if VG_XACT_CLO(arg, "--leak-check=no",
5277                            MC_(clo_leak_check), LC_Off) {}
5278   else if VG_XACT_CLO(arg, "--leak-check=summary",
5279                            MC_(clo_leak_check), LC_Summary) {}
5280   else if VG_XACT_CLO(arg, "--leak-check=yes",
5281                            MC_(clo_leak_check), LC_Full) {}
5282   else if VG_XACT_CLO(arg, "--leak-check=full",
5283                            MC_(clo_leak_check), LC_Full) {}
5284
5285   else if VG_XACT_CLO(arg, "--leak-resolution=low",
5286                            MC_(clo_leak_resolution), Vg_LowRes) {}
5287   else if VG_XACT_CLO(arg, "--leak-resolution=med",
5288                            MC_(clo_leak_resolution), Vg_MedRes) {}
5289   else if VG_XACT_CLO(arg, "--leak-resolution=high",
5290                            MC_(clo_leak_resolution), Vg_HighRes) {}
5291
5292   else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
5293      Bool ok = parse_ignore_ranges(tmp_str);
5294      if (!ok) {
5295         VG_(message)(Vg_DebugMsg,
5296            "ERROR: --ignore-ranges: "
5297            "invalid syntax, or end <= start in range\n");
5298         return False;
5299      }
5300      if (gIgnoredAddressRanges) {
5301         Word i;
5302         for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
5303            UWord val     = IAR_INVALID;
5304            UWord key_min = ~(UWord)0;
5305            UWord key_max = (UWord)0;
5306            VG_(indexRangeMap)( &key_min, &key_max, &val,
5307                                gIgnoredAddressRanges, i );
5308            tl_assert(key_min <= key_max);
5309            UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
5310            if (key_max - key_min > limit) {
5311               VG_(message)(Vg_DebugMsg,
5312                  "ERROR: --ignore-ranges: suspiciously large range:\n");
5313               VG_(message)(Vg_DebugMsg,
5314                   "       0x%lx-0x%lx (size %ld)\n", key_min, key_max,
5315                   key_max - key_min + 1);
5316               return False;
5317            }
5318         }
5319      }
5320   }
5321
5322   else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
5323   else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
5324
5325   else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
5326                       MC_(clo_keep_stacktraces), KS_alloc) {}
5327   else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
5328                       MC_(clo_keep_stacktraces), KS_free) {}
5329   else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
5330                       MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
5331   else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
5332                       MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
5333   else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
5334                       MC_(clo_keep_stacktraces), KS_none) {}
5335
5336   else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
5337                       MC_(clo_show_mismatched_frees)) {}
5338
5339   else
5340      return VG_(replacement_malloc_process_cmd_line_option)(arg);
5341
5342   return True;
5343
5344
5345  bad_level:
5346   VG_(fmsg_bad_option)(arg,
5347      "--track-origins=yes has no effect when --undef-value-errors=no.\n");
5348}
5349
5350static void mc_print_usage(void)
5351{
5352   const HChar* plo_default = "no";
5353#  if defined(VGO_darwin)
5354   plo_default = "yes";
5355#  endif
5356
5357   VG_(printf)(
5358"    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
5359"    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
5360"    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
5361"                                            [definite,possible]\n"
5362"    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
5363"                                            [definite,possible]\n"
5364"        where kind is one of:\n"
5365"          definite indirect possible reachable all none\n"
5366"    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
5367"        improving leak search false positive [none]\n"
5368"        where heur is one of:\n"
5369"          stdstring length64 newarray multipleinheritance all none\n"
5370"    --show-reachable=yes             same as --show-leak-kinds=all\n"
5371"    --show-reachable=no --show-possibly-lost=yes\n"
5372"                                     same as --show-leak-kinds=definite,possible\n"
5373"    --show-reachable=no --show-possibly-lost=no\n"
5374"                                     same as --show-leak-kinds=definite\n"
5375"    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
5376"    --track-origins=no|yes           show origins of undefined values? [no]\n"
5377"    --partial-loads-ok=no|yes        too hard to explain here; see manual [%s]\n"
5378"    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
5379"    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
5380"    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
5381"    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
5382"    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
5383"    --free-fill=<hexnumber>          fill free'd areas with given value\n"
5384"    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
5385"        stack trace(s) to keep for malloc'd/free'd areas       [alloc-then-free]\n"
5386"    --show-mismatched-frees=no|yes   show frees that don't match the allocator? [yes]\n"
5387, plo_default
5388   );
5389}
5390
5391static void mc_print_debug_usage(void)
5392{
5393   VG_(printf)(
5394"    (none)\n"
5395   );
5396}
5397
5398
5399/*------------------------------------------------------------*/
5400/*--- Client blocks                                        ---*/
5401/*------------------------------------------------------------*/
5402
5403/* Client block management:
5404
5405   This is managed as an expanding array of client block descriptors.
5406   Indices of live descriptors are issued to the client, so it can ask
5407   to free them later.  Therefore we cannot slide live entries down
5408   over dead ones.  Instead we must use free/inuse flags and scan for
5409   an empty slot at allocation time.  This in turn means allocation is
5410   relatively expensive, so we hope this does not happen too often.
5411
5412   An unused block has start == size == 0
5413*/
5414
5415/* type CGenBlock is defined in mc_include.h */
5416
5417/* This subsystem is self-initialising. */
5418static UWord      cgb_size = 0;
5419static UWord      cgb_used = 0;
5420static CGenBlock* cgbs     = NULL;
5421
5422/* Stats for this subsystem. */
5423static ULong cgb_used_MAX = 0;   /* Max in use. */
5424static ULong cgb_allocs   = 0;   /* Number of allocs. */
5425static ULong cgb_discards = 0;   /* Number of discards. */
5426static ULong cgb_search   = 0;   /* Number of searches. */
5427
5428
5429/* Get access to the client block array. */
5430void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5431                                 /*OUT*/UWord* nBlocks )
5432{
5433   *blocks  = cgbs;
5434   *nBlocks = cgb_used;
5435}
5436
5437
5438static
5439Int alloc_client_block ( void )
5440{
5441   UWord      i, sz_new;
5442   CGenBlock* cgbs_new;
5443
5444   cgb_allocs++;
5445
5446   for (i = 0; i < cgb_used; i++) {
5447      cgb_search++;
5448      if (cgbs[i].start == 0 && cgbs[i].size == 0)
5449         return i;
5450   }
5451
5452   /* Not found.  Try to allocate one at the end. */
5453   if (cgb_used < cgb_size) {
5454      cgb_used++;
5455      return cgb_used-1;
5456   }
5457
5458   /* Ok, we have to allocate a new one. */
5459   tl_assert(cgb_used == cgb_size);
5460   sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5461
5462   cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5463   for (i = 0; i < cgb_used; i++)
5464      cgbs_new[i] = cgbs[i];
5465
5466   if (cgbs != NULL)
5467      VG_(free)( cgbs );
5468   cgbs = cgbs_new;
5469
5470   cgb_size = sz_new;
5471   cgb_used++;
5472   if (cgb_used > cgb_used_MAX)
5473      cgb_used_MAX = cgb_used;
5474   return cgb_used-1;
5475}
5476
5477
5478static void show_client_block_stats ( void )
5479{
5480   VG_(message)(Vg_DebugMsg,
5481      "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5482      cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5483   );
5484}
5485static void print_monitor_help ( void )
5486{
5487   VG_(gdb_printf)
5488      (
5489"\n"
5490"memcheck monitor commands:\n"
5491"  get_vbits <addr> [<len>]\n"
5492"        returns validity bits for <len> (or 1) bytes at <addr>\n"
5493"            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5494"        Example: get_vbits 0x8049c78 10\n"
5495"  make_memory [noaccess|undefined\n"
5496"                     |defined|Definedifaddressable] <addr> [<len>]\n"
5497"        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5498"  check_memory [addressable|defined] <addr> [<len>]\n"
5499"        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5500"            and outputs a description of <addr>\n"
5501"  leak_check [full*|summary]\n"
5502"                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
5503"                [heuristics heur1,heur2,...]\n"
5504"                [increased*|changed|any]\n"
5505"                [unlimited*|limited <max_loss_records_output>]\n"
5506"            * = defaults\n"
5507"       where kind is one of:\n"
5508"         definite indirect possible reachable all none\n"
5509"       where heur is one of:\n"
5510"         stdstring length64 newarray multipleinheritance all none*\n"
5511"       Examples: leak_check\n"
5512"                 leak_check summary any\n"
5513"                 leak_check full kinds indirect,possible\n"
5514"                 leak_check full reachable any limited 100\n"
5515"  block_list <loss_record_nr>\n"
5516"        after a leak search, shows the list of blocks of <loss_record_nr>\n"
5517"  who_points_at <addr> [<len>]\n"
5518"        shows places pointing inside <len> (default 1) bytes at <addr>\n"
5519"        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
5520"         with len > 1, will also show \"interior pointers\")\n"
5521"\n");
5522}
5523
5524/* return True if request recognised, False otherwise */
5525static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
5526{
5527   HChar* wcmd;
5528   HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
5529   HChar *ssaveptr;
5530
5531   VG_(strcpy) (s, req);
5532
5533   wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5534   /* NB: if possible, avoid introducing a new command below which
5535      starts with the same first letter(s) as an already existing
5536      command. This ensures a shorter abbreviation for the user. */
5537   switch (VG_(keyword_id)
5538           ("help get_vbits leak_check make_memory check_memory "
5539            "block_list who_points_at",
5540            wcmd, kwd_report_duplicated_matches)) {
5541   case -2: /* multiple matches */
5542      return True;
5543   case -1: /* not found */
5544      return False;
5545   case  0: /* help */
5546      print_monitor_help();
5547      return True;
5548   case  1: { /* get_vbits */
5549      Addr address;
5550      SizeT szB = 1;
5551      if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
5552         UChar vbits;
5553         Int i;
5554         Int unaddressable = 0;
5555         for (i = 0; i < szB; i++) {
5556            Int res = mc_get_or_set_vbits_for_client
5557               (address+i, (Addr) &vbits, 1,
5558                False, /* get them */
5559                False  /* is client request */ );
5560            /* we are before the first character on next line, print a \n. */
5561            if ((i % 32) == 0 && i != 0)
5562               VG_(printf) ("\n");
5563            /* we are before the next block of 4 starts, print a space. */
5564            else if ((i % 4) == 0 && i != 0)
5565               VG_(printf) (" ");
5566            if (res == 1) {
5567               VG_(printf) ("%02x", vbits);
5568            } else {
5569               tl_assert(3 == res);
5570               unaddressable++;
5571               VG_(printf) ("__");
5572            }
5573         }
5574         VG_(printf) ("\n");
5575         if (unaddressable) {
5576            VG_(printf)
5577               ("Address %p len %ld has %d bytes unaddressable\n",
5578                (void *)address, szB, unaddressable);
5579         }
5580      }
5581      return True;
5582   }
5583   case  2: { /* leak_check */
5584      Int err = 0;
5585      LeakCheckParams lcp;
5586      HChar* kw;
5587
5588      lcp.mode               = LC_Full;
5589      lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
5590      lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
5591      lcp.heuristics         = 0;
5592      lcp.deltamode          = LCD_Increased;
5593      lcp.max_loss_records_output = 999999999;
5594      lcp.requested_by_monitor_command = True;
5595
5596      for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5597           kw != NULL;
5598           kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5599         switch (VG_(keyword_id)
5600                 ("full summary "
5601                  "kinds reachable possibleleak definiteleak "
5602                  "heuristics "
5603                  "increased changed any "
5604                  "unlimited limited ",
5605                  kw, kwd_report_all)) {
5606         case -2: err++; break;
5607         case -1: err++; break;
5608         case  0: /* full */
5609            lcp.mode = LC_Full; break;
5610         case  1: /* summary */
5611            lcp.mode = LC_Summary; break;
5612         case  2: { /* kinds */
5613            wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5614            if (wcmd == NULL
5615                || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
5616                                        True/*allow_all*/,
5617                                        wcmd,
5618                                        &lcp.show_leak_kinds)) {
5619               VG_(gdb_printf) ("missing or malformed leak kinds set\n");
5620               err++;
5621            }
5622            break;
5623         }
5624         case  3: /* reachable */
5625            lcp.show_leak_kinds = MC_(all_Reachedness)();
5626            break;
5627         case  4: /* possibleleak */
5628            lcp.show_leak_kinds
5629               = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
5630            break;
5631         case  5: /* definiteleak */
5632            lcp.show_leak_kinds = R2S(Unreached);
5633            break;
5634         case  6: { /* heuristics */
5635            wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5636            if (wcmd == NULL
5637                || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
5638                                        True,/*allow_all*/
5639                                        wcmd,
5640                                        &lcp.heuristics)) {
5641               VG_(gdb_printf) ("missing or malformed heuristics set\n");
5642               err++;
5643            }
5644            break;
5645         }
5646         case  7: /* increased */
5647            lcp.deltamode = LCD_Increased; break;
5648         case  8: /* changed */
5649            lcp.deltamode = LCD_Changed; break;
5650         case  9: /* any */
5651            lcp.deltamode = LCD_Any; break;
5652         case 10: /* unlimited */
5653            lcp.max_loss_records_output = 999999999; break;
5654         case 11: { /* limited */
5655            Int int_value;
5656            const HChar* endptr;
5657
5658            wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5659            if (wcmd == NULL) {
5660               int_value = 0;
5661               endptr = "empty"; /* to report an error below */
5662            } else {
5663               HChar *the_end;
5664               int_value = VG_(strtoll10) (wcmd, &the_end);
5665               endptr = the_end;
5666            }
5667            if (*endptr != '\0')
5668               VG_(gdb_printf) ("missing or malformed integer value\n");
5669            else if (int_value > 0)
5670               lcp.max_loss_records_output = (UInt) int_value;
5671            else
5672               VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
5673                                int_value);
5674            break;
5675         }
5676         default:
5677            tl_assert (0);
5678         }
5679      }
5680      if (!err)
5681         MC_(detect_memory_leaks)(tid, &lcp);
5682      return True;
5683   }
5684
5685   case  3: { /* make_memory */
5686      Addr address;
5687      SizeT szB = 1;
5688      Int kwdid = VG_(keyword_id)
5689         ("noaccess undefined defined Definedifaddressable",
5690          VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5691      if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5692         return True;
5693      switch (kwdid) {
5694      case -2: break;
5695      case -1: break;
5696      case  0: MC_(make_mem_noaccess) (address, szB); break;
5697      case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5698                                                    MC_OKIND_USER ); break;
5699      case  2: MC_(make_mem_defined) ( address, szB ); break;
5700      case  3: make_mem_defined_if_addressable ( address, szB ); break;;
5701      default: tl_assert(0);
5702      }
5703      return True;
5704   }
5705
5706   case  4: { /* check_memory */
5707      Addr address;
5708      SizeT szB = 1;
5709      Addr bad_addr;
5710      UInt okind;
5711      const HChar* src;
5712      UInt otag;
5713      UInt ecu;
5714      ExeContext* origin_ec;
5715      MC_ReadResult res;
5716
5717      Int kwdid = VG_(keyword_id)
5718         ("addressable defined",
5719          VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5720      if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5721         return True;
5722      switch (kwdid) {
5723      case -2: break;
5724      case -1: break;
5725      case  0: /* addressable */
5726         if (is_mem_addressable ( address, szB, &bad_addr ))
5727            VG_(printf) ("Address %p len %ld addressable\n",
5728                             (void *)address, szB);
5729         else
5730            VG_(printf)
5731               ("Address %p len %ld not addressable:\nbad address %p\n",
5732                (void *)address, szB, (void *) bad_addr);
5733         MC_(pp_describe_addr) (address);
5734         break;
5735      case  1: /* defined */
5736         res = is_mem_defined ( address, szB, &bad_addr, &otag );
5737         if (MC_AddrErr == res)
5738            VG_(printf)
5739               ("Address %p len %ld not addressable:\nbad address %p\n",
5740                (void *)address, szB, (void *) bad_addr);
5741         else if (MC_ValueErr == res) {
5742            okind = otag & 3;
5743            switch (okind) {
5744            case MC_OKIND_STACK:
5745               src = " was created by a stack allocation"; break;
5746            case MC_OKIND_HEAP:
5747               src = " was created by a heap allocation"; break;
5748            case MC_OKIND_USER:
5749               src = " was created by a client request"; break;
5750            case MC_OKIND_UNKNOWN:
5751               src = ""; break;
5752            default: tl_assert(0);
5753            }
5754            VG_(printf)
5755               ("Address %p len %ld not defined:\n"
5756                "Uninitialised value at %p%s\n",
5757                (void *)address, szB, (void *) bad_addr, src);
5758            ecu = otag & ~3;
5759            if (VG_(is_plausible_ECU)(ecu)) {
5760               origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5761               VG_(pp_ExeContext)( origin_ec );
5762            }
5763         }
5764         else
5765            VG_(printf) ("Address %p len %ld defined\n",
5766                         (void *)address, szB);
5767         MC_(pp_describe_addr) (address);
5768         break;
5769      default: tl_assert(0);
5770      }
5771      return True;
5772   }
5773
5774   case  5: { /* block_list */
5775      HChar* wl;
5776      HChar *endptr;
5777      UInt lr_nr = 0;
5778      wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
5779      if (wl != NULL)
5780         lr_nr = VG_(strtoull10) (wl, &endptr);
5781      if (wl == NULL || *endptr != '\0') {
5782         VG_(gdb_printf) ("malformed or missing integer\n");
5783      } else {
5784         // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
5785         if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
5786            VG_(gdb_printf) ("invalid loss record nr\n");
5787      }
5788      return True;
5789   }
5790
5791   case  6: { /* who_points_at */
5792      Addr address;
5793      SizeT szB = 1;
5794
5795      if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5796         return True;
5797      if (address == (Addr) 0) {
5798         VG_(gdb_printf) ("Cannot search who points at 0x0\n");
5799         return True;
5800      }
5801      MC_(who_points_at) (address, szB);
5802      return True;
5803   }
5804
5805   default:
5806      tl_assert(0);
5807      return False;
5808   }
5809}
5810
5811/*------------------------------------------------------------*/
5812/*--- Client requests                                      ---*/
5813/*------------------------------------------------------------*/
5814
5815static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5816{
5817   Int   i;
5818   Addr  bad_addr;
5819
5820   if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5821       && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5822       && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5823       && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
5824       && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
5825       && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
5826       && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
5827       && VG_USERREQ__MEMPOOL_FREE     != arg[0]
5828       && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
5829       && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
5830       && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
5831       && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
5832       && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
5833       && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
5834       && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
5835      return False;
5836
5837   switch (arg[0]) {
5838      case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
5839         Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5840         if (!ok)
5841            MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5842         *ret = ok ? (UWord)NULL : bad_addr;
5843         break;
5844      }
5845
5846      case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5847         Bool errorV    = False;
5848         Addr bad_addrV = 0;
5849         UInt otagV     = 0;
5850         Bool errorA    = False;
5851         Addr bad_addrA = 0;
5852         is_mem_defined_comprehensive(
5853            arg[1], arg[2],
5854            &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5855         );
5856         if (errorV) {
5857            MC_(record_user_error) ( tid, bad_addrV,
5858                                     /*isAddrErr*/False, otagV );
5859         }
5860         if (errorA) {
5861            MC_(record_user_error) ( tid, bad_addrA,
5862                                     /*isAddrErr*/True, 0 );
5863         }
5864         /* Return the lower of the two erring addresses, if any. */
5865         *ret = 0;
5866         if (errorV && !errorA) {
5867            *ret = bad_addrV;
5868         }
5869         if (!errorV && errorA) {
5870            *ret = bad_addrA;
5871         }
5872         if (errorV && errorA) {
5873            *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5874         }
5875         break;
5876      }
5877
5878      case VG_USERREQ__DO_LEAK_CHECK: {
5879         LeakCheckParams lcp;
5880
5881         if (arg[1] == 0)
5882            lcp.mode = LC_Full;
5883         else if (arg[1] == 1)
5884            lcp.mode = LC_Summary;
5885         else {
5886            VG_(message)(Vg_UserMsg,
5887                         "Warning: unknown memcheck leak search mode\n");
5888            lcp.mode = LC_Full;
5889         }
5890
5891         lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
5892         lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
5893         lcp.heuristics = MC_(clo_leak_check_heuristics);
5894
5895         if (arg[2] == 0)
5896            lcp.deltamode = LCD_Any;
5897         else if (arg[2] == 1)
5898            lcp.deltamode = LCD_Increased;
5899         else if (arg[2] == 2)
5900            lcp.deltamode = LCD_Changed;
5901         else {
5902            VG_(message)
5903               (Vg_UserMsg,
5904                "Warning: unknown memcheck leak search deltamode\n");
5905            lcp.deltamode = LCD_Any;
5906         }
5907         lcp.max_loss_records_output = 999999999;
5908         lcp.requested_by_monitor_command = False;
5909
5910         MC_(detect_memory_leaks)(tid, &lcp);
5911         *ret = 0; /* return value is meaningless */
5912         break;
5913      }
5914
5915      case VG_USERREQ__MAKE_MEM_NOACCESS:
5916         MC_(make_mem_noaccess) ( arg[1], arg[2] );
5917         *ret = -1;
5918         break;
5919
5920      case VG_USERREQ__MAKE_MEM_UNDEFINED:
5921         make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5922                                              MC_OKIND_USER );
5923         *ret = -1;
5924         break;
5925
5926      case VG_USERREQ__MAKE_MEM_DEFINED:
5927         MC_(make_mem_defined) ( arg[1], arg[2] );
5928         *ret = -1;
5929         break;
5930
5931      case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5932         make_mem_defined_if_addressable ( arg[1], arg[2] );
5933         *ret = -1;
5934         break;
5935
5936      case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5937         if (arg[1] != 0 && arg[2] != 0) {
5938            i = alloc_client_block();
5939            /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5940            cgbs[i].start = arg[1];
5941            cgbs[i].size  = arg[2];
5942            cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
5943            cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5944            *ret = i;
5945         } else
5946            *ret = -1;
5947         break;
5948
5949      case VG_USERREQ__DISCARD: /* discard */
5950         if (cgbs == NULL
5951             || arg[2] >= cgb_used ||
5952             (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5953            *ret = 1;
5954         } else {
5955            tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5956            cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5957            VG_(free)(cgbs[arg[2]].desc);
5958            cgb_discards++;
5959            *ret = 0;
5960         }
5961         break;
5962
5963      case VG_USERREQ__GET_VBITS:
5964         *ret = mc_get_or_set_vbits_for_client
5965                   ( arg[1], arg[2], arg[3],
5966                     False /* get them */,
5967                     True /* is client request */ );
5968         break;
5969
5970      case VG_USERREQ__SET_VBITS:
5971         *ret = mc_get_or_set_vbits_for_client
5972                   ( arg[1], arg[2], arg[3],
5973                     True /* set them */,
5974                     True /* is client request */ );
5975         break;
5976
5977      case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5978         UWord** argp = (UWord**)arg;
5979         // MC_(bytes_leaked) et al were set by the last leak check (or zero
5980         // if no prior leak checks performed).
5981         *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5982         *argp[2] = MC_(bytes_dubious);
5983         *argp[3] = MC_(bytes_reachable);
5984         *argp[4] = MC_(bytes_suppressed);
5985         // there is no argp[5]
5986         //*argp[5] = MC_(bytes_indirect);
5987         // XXX need to make *argp[1-4] defined;  currently done in the
5988         // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5989         *ret = 0;
5990         return True;
5991      }
5992      case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5993         UWord** argp = (UWord**)arg;
5994         // MC_(blocks_leaked) et al were set by the last leak check (or zero
5995         // if no prior leak checks performed).
5996         *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5997         *argp[2] = MC_(blocks_dubious);
5998         *argp[3] = MC_(blocks_reachable);
5999         *argp[4] = MC_(blocks_suppressed);
6000         // there is no argp[5]
6001         //*argp[5] = MC_(blocks_indirect);
6002         // XXX need to make *argp[1-4] defined;  currently done in the
6003         // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
6004         *ret = 0;
6005         return True;
6006      }
6007      case VG_USERREQ__MALLOCLIKE_BLOCK: {
6008         Addr p         = (Addr)arg[1];
6009         SizeT sizeB    =       arg[2];
6010         UInt rzB       =       arg[3];
6011         Bool is_zeroed = (Bool)arg[4];
6012
6013         MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
6014                          MC_AllocCustom, MC_(malloc_list) );
6015         if (rzB > 0) {
6016            MC_(make_mem_noaccess) ( p - rzB, rzB);
6017            MC_(make_mem_noaccess) ( p + sizeB, rzB);
6018         }
6019         return True;
6020      }
6021      case VG_USERREQ__RESIZEINPLACE_BLOCK: {
6022         Addr p         = (Addr)arg[1];
6023         SizeT oldSizeB =       arg[2];
6024         SizeT newSizeB =       arg[3];
6025         UInt rzB       =       arg[4];
6026
6027         MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
6028         return True;
6029      }
6030      case VG_USERREQ__FREELIKE_BLOCK: {
6031         Addr p         = (Addr)arg[1];
6032         UInt rzB       =       arg[2];
6033
6034         MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
6035         return True;
6036      }
6037
6038      case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
6039         HChar* s  = (HChar*)arg[1];
6040         Addr  dst = (Addr) arg[2];
6041         Addr  src = (Addr) arg[3];
6042         SizeT len = (SizeT)arg[4];
6043         MC_(record_overlap_error)(tid, s, src, dst, len);
6044         return True;
6045      }
6046
6047      case VG_USERREQ__CREATE_MEMPOOL: {
6048         Addr pool      = (Addr)arg[1];
6049         UInt rzB       =       arg[2];
6050         Bool is_zeroed = (Bool)arg[3];
6051
6052         MC_(create_mempool) ( pool, rzB, is_zeroed );
6053         return True;
6054      }
6055
6056      case VG_USERREQ__DESTROY_MEMPOOL: {
6057         Addr pool      = (Addr)arg[1];
6058
6059         MC_(destroy_mempool) ( pool );
6060         return True;
6061      }
6062
6063      case VG_USERREQ__MEMPOOL_ALLOC: {
6064         Addr pool      = (Addr)arg[1];
6065         Addr addr      = (Addr)arg[2];
6066         UInt size      =       arg[3];
6067
6068         MC_(mempool_alloc) ( tid, pool, addr, size );
6069         return True;
6070      }
6071
6072      case VG_USERREQ__MEMPOOL_FREE: {
6073         Addr pool      = (Addr)arg[1];
6074         Addr addr      = (Addr)arg[2];
6075
6076         MC_(mempool_free) ( pool, addr );
6077         return True;
6078      }
6079
6080      case VG_USERREQ__MEMPOOL_TRIM: {
6081         Addr pool      = (Addr)arg[1];
6082         Addr addr      = (Addr)arg[2];
6083         UInt size      =       arg[3];
6084
6085         MC_(mempool_trim) ( pool, addr, size );
6086         return True;
6087      }
6088
6089      case VG_USERREQ__MOVE_MEMPOOL: {
6090         Addr poolA     = (Addr)arg[1];
6091         Addr poolB     = (Addr)arg[2];
6092
6093         MC_(move_mempool) ( poolA, poolB );
6094         return True;
6095      }
6096
6097      case VG_USERREQ__MEMPOOL_CHANGE: {
6098         Addr pool      = (Addr)arg[1];
6099         Addr addrA     = (Addr)arg[2];
6100         Addr addrB     = (Addr)arg[3];
6101         UInt size      =       arg[4];
6102
6103         MC_(mempool_change) ( pool, addrA, addrB, size );
6104         return True;
6105      }
6106
6107      case VG_USERREQ__MEMPOOL_EXISTS: {
6108         Addr pool      = (Addr)arg[1];
6109
6110         *ret = (UWord) MC_(mempool_exists) ( pool );
6111	 return True;
6112      }
6113
6114      case VG_USERREQ__GDB_MONITOR_COMMAND: {
6115         Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
6116         if (handled)
6117            *ret = 1;
6118         else
6119            *ret = 0;
6120         return handled;
6121      }
6122
6123      case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
6124      case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
6125         Bool addRange
6126            = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
6127         Bool ok
6128            = modify_ignore_ranges(addRange, arg[1], arg[2]);
6129         *ret = ok ? 1 : 0;
6130         return True;
6131      }
6132
6133      default:
6134         VG_(message)(
6135            Vg_UserMsg,
6136            "Warning: unknown memcheck client request code %llx\n",
6137            (ULong)arg[0]
6138         );
6139         return False;
6140   }
6141   return True;
6142}
6143
6144
6145/*------------------------------------------------------------*/
6146/*--- Crude profiling machinery.                           ---*/
6147/*------------------------------------------------------------*/
6148
6149// We track a number of interesting events (using PROF_EVENT)
6150// if MC_PROFILE_MEMORY is defined.
6151
6152#ifdef MC_PROFILE_MEMORY
6153
6154UInt   MC_(event_ctr)[N_PROF_EVENTS];
6155HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
6156
6157static void init_prof_mem ( void )
6158{
6159   Int i;
6160   for (i = 0; i < N_PROF_EVENTS; i++) {
6161      MC_(event_ctr)[i] = 0;
6162      MC_(event_ctr_name)[i] = NULL;
6163   }
6164}
6165
6166static void done_prof_mem ( void )
6167{
6168   Int  i;
6169   Bool spaced = False;
6170   for (i = 0; i < N_PROF_EVENTS; i++) {
6171      if (!spaced && (i % 10) == 0) {
6172         VG_(printf)("\n");
6173         spaced = True;
6174      }
6175      if (MC_(event_ctr)[i] > 0) {
6176         spaced = False;
6177         VG_(printf)( "prof mem event %3d: %9d   %s\n",
6178                      i, MC_(event_ctr)[i],
6179                      MC_(event_ctr_name)[i]
6180                         ? MC_(event_ctr_name)[i] : "unnamed");
6181      }
6182   }
6183}
6184
6185#else
6186
6187static void init_prof_mem ( void ) { }
6188static void done_prof_mem ( void ) { }
6189
6190#endif
6191
6192
6193/*------------------------------------------------------------*/
6194/*--- Origin tracking stuff                                ---*/
6195/*------------------------------------------------------------*/
6196
6197/*--------------------------------------------*/
6198/*--- Origin tracking: load handlers       ---*/
6199/*--------------------------------------------*/
6200
6201static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
6202   return or1 > or2 ? or1 : or2;
6203}
6204
6205UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
6206   OCacheLine* line;
6207   UChar descr;
6208   UWord lineoff = oc_line_offset(a);
6209   UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6210
6211   if (OC_ENABLE_ASSERTIONS) {
6212      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6213   }
6214
6215   line = find_OCacheLine( a );
6216
6217   descr = line->descr[lineoff];
6218   if (OC_ENABLE_ASSERTIONS) {
6219      tl_assert(descr < 0x10);
6220   }
6221
6222   if (LIKELY(0 == (descr & (1 << byteoff))))  {
6223      return 0;
6224   } else {
6225      return line->w32[lineoff];
6226   }
6227}
6228
6229UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
6230   OCacheLine* line;
6231   UChar descr;
6232   UWord lineoff, byteoff;
6233
6234   if (UNLIKELY(a & 1)) {
6235      /* Handle misaligned case, slowly. */
6236      UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
6237      UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
6238      return merge_origins(oLo, oHi);
6239   }
6240
6241   lineoff = oc_line_offset(a);
6242   byteoff = a & 3; /* 0 or 2 */
6243
6244   if (OC_ENABLE_ASSERTIONS) {
6245      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6246   }
6247   line = find_OCacheLine( a );
6248
6249   descr = line->descr[lineoff];
6250   if (OC_ENABLE_ASSERTIONS) {
6251      tl_assert(descr < 0x10);
6252   }
6253
6254   if (LIKELY(0 == (descr & (3 << byteoff)))) {
6255      return 0;
6256   } else {
6257      return line->w32[lineoff];
6258   }
6259}
6260
6261UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
6262   OCacheLine* line;
6263   UChar descr;
6264   UWord lineoff;
6265
6266   if (UNLIKELY(a & 3)) {
6267      /* Handle misaligned case, slowly. */
6268      UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
6269      UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
6270      return merge_origins(oLo, oHi);
6271   }
6272
6273   lineoff = oc_line_offset(a);
6274   if (OC_ENABLE_ASSERTIONS) {
6275      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6276   }
6277
6278   line = find_OCacheLine( a );
6279
6280   descr = line->descr[lineoff];
6281   if (OC_ENABLE_ASSERTIONS) {
6282      tl_assert(descr < 0x10);
6283   }
6284
6285   if (LIKELY(0 == descr)) {
6286      return 0;
6287   } else {
6288      return line->w32[lineoff];
6289   }
6290}
6291
6292UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
6293   OCacheLine* line;
6294   UChar descrLo, descrHi, descr;
6295   UWord lineoff;
6296
6297   if (UNLIKELY(a & 7)) {
6298      /* Handle misaligned case, slowly. */
6299      UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
6300      UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
6301      return merge_origins(oLo, oHi);
6302   }
6303
6304   lineoff = oc_line_offset(a);
6305   if (OC_ENABLE_ASSERTIONS) {
6306      tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6307   }
6308
6309   line = find_OCacheLine( a );
6310
6311   descrLo = line->descr[lineoff + 0];
6312   descrHi = line->descr[lineoff + 1];
6313   descr   = descrLo | descrHi;
6314   if (OC_ENABLE_ASSERTIONS) {
6315      tl_assert(descr < 0x10);
6316   }
6317
6318   if (LIKELY(0 == descr)) {
6319      return 0; /* both 32-bit chunks are defined */
6320   } else {
6321      UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
6322      UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
6323      return merge_origins(oLo, oHi);
6324   }
6325}
6326
6327UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
6328   UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
6329   UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
6330   UInt oBoth = merge_origins(oLo, oHi);
6331   return (UWord)oBoth;
6332}
6333
6334UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
6335   UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
6336   UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
6337   UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
6338   UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
6339   UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
6340                              merge_origins(oQ2, oQ3));
6341   return (UWord)oAll;
6342}
6343
6344
6345/*--------------------------------------------*/
6346/*--- Origin tracking: store handlers      ---*/
6347/*--------------------------------------------*/
6348
6349void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
6350   OCacheLine* line;
6351   UWord lineoff = oc_line_offset(a);
6352   UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6353
6354   if (OC_ENABLE_ASSERTIONS) {
6355      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6356   }
6357
6358   line = find_OCacheLine( a );
6359
6360   if (d32 == 0) {
6361      line->descr[lineoff] &= ~(1 << byteoff);
6362   } else {
6363      line->descr[lineoff] |= (1 << byteoff);
6364      line->w32[lineoff] = d32;
6365   }
6366}
6367
6368void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
6369   OCacheLine* line;
6370   UWord lineoff, byteoff;
6371
6372   if (UNLIKELY(a & 1)) {
6373      /* Handle misaligned case, slowly. */
6374      MC_(helperc_b_store1)( a + 0, d32 );
6375      MC_(helperc_b_store1)( a + 1, d32 );
6376      return;
6377   }
6378
6379   lineoff = oc_line_offset(a);
6380   byteoff = a & 3; /* 0 or 2 */
6381
6382   if (OC_ENABLE_ASSERTIONS) {
6383      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6384   }
6385
6386   line = find_OCacheLine( a );
6387
6388   if (d32 == 0) {
6389      line->descr[lineoff] &= ~(3 << byteoff);
6390   } else {
6391      line->descr[lineoff] |= (3 << byteoff);
6392      line->w32[lineoff] = d32;
6393   }
6394}
6395
6396void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
6397   OCacheLine* line;
6398   UWord lineoff;
6399
6400   if (UNLIKELY(a & 3)) {
6401      /* Handle misaligned case, slowly. */
6402      MC_(helperc_b_store2)( a + 0, d32 );
6403      MC_(helperc_b_store2)( a + 2, d32 );
6404      return;
6405   }
6406
6407   lineoff = oc_line_offset(a);
6408   if (OC_ENABLE_ASSERTIONS) {
6409      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6410   }
6411
6412   line = find_OCacheLine( a );
6413
6414   if (d32 == 0) {
6415      line->descr[lineoff] = 0;
6416   } else {
6417      line->descr[lineoff] = 0xF;
6418      line->w32[lineoff] = d32;
6419   }
6420}
6421
6422void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
6423   OCacheLine* line;
6424   UWord lineoff;
6425
6426   if (UNLIKELY(a & 7)) {
6427      /* Handle misaligned case, slowly. */
6428      MC_(helperc_b_store4)( a + 0, d32 );
6429      MC_(helperc_b_store4)( a + 4, d32 );
6430      return;
6431   }
6432
6433   lineoff = oc_line_offset(a);
6434   if (OC_ENABLE_ASSERTIONS) {
6435      tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6436   }
6437
6438   line = find_OCacheLine( a );
6439
6440   if (d32 == 0) {
6441      line->descr[lineoff + 0] = 0;
6442      line->descr[lineoff + 1] = 0;
6443   } else {
6444      line->descr[lineoff + 0] = 0xF;
6445      line->descr[lineoff + 1] = 0xF;
6446      line->w32[lineoff + 0] = d32;
6447      line->w32[lineoff + 1] = d32;
6448   }
6449}
6450
6451void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
6452   MC_(helperc_b_store8)( a + 0, d32 );
6453   MC_(helperc_b_store8)( a + 8, d32 );
6454}
6455
6456void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
6457   MC_(helperc_b_store8)( a +  0, d32 );
6458   MC_(helperc_b_store8)( a +  8, d32 );
6459   MC_(helperc_b_store8)( a + 16, d32 );
6460   MC_(helperc_b_store8)( a + 24, d32 );
6461}
6462
6463
6464/*--------------------------------------------*/
6465/*--- Origin tracking: sarp handlers       ---*/
6466/*--------------------------------------------*/
6467
6468__attribute__((noinline))
6469static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
6470   if ((a & 1) && len >= 1) {
6471      MC_(helperc_b_store1)( a, otag );
6472      a++;
6473      len--;
6474   }
6475   if ((a & 2) && len >= 2) {
6476      MC_(helperc_b_store2)( a, otag );
6477      a += 2;
6478      len -= 2;
6479   }
6480   if (len >= 4)
6481      tl_assert(0 == (a & 3));
6482   while (len >= 4) {
6483      MC_(helperc_b_store4)( a, otag );
6484      a += 4;
6485      len -= 4;
6486   }
6487   if (len >= 2) {
6488      MC_(helperc_b_store2)( a, otag );
6489      a += 2;
6490      len -= 2;
6491   }
6492   if (len >= 1) {
6493      MC_(helperc_b_store1)( a, otag );
6494      //a++;
6495      len--;
6496   }
6497   tl_assert(len == 0);
6498}
6499
6500__attribute__((noinline))
6501static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
6502   if ((a & 1) && len >= 1) {
6503      MC_(helperc_b_store1)( a, 0 );
6504      a++;
6505      len--;
6506   }
6507   if ((a & 2) && len >= 2) {
6508      MC_(helperc_b_store2)( a, 0 );
6509      a += 2;
6510      len -= 2;
6511   }
6512   if (len >= 4)
6513      tl_assert(0 == (a & 3));
6514   while (len >= 4) {
6515      MC_(helperc_b_store4)( a, 0 );
6516      a += 4;
6517      len -= 4;
6518   }
6519   if (len >= 2) {
6520      MC_(helperc_b_store2)( a, 0 );
6521      a += 2;
6522      len -= 2;
6523   }
6524   if (len >= 1) {
6525      MC_(helperc_b_store1)( a, 0 );
6526      //a++;
6527      len--;
6528   }
6529   tl_assert(len == 0);
6530}
6531
6532
6533/*------------------------------------------------------------*/
6534/*--- Setup and finalisation                               ---*/
6535/*------------------------------------------------------------*/
6536
6537static void mc_post_clo_init ( void )
6538{
6539   /* If we've been asked to emit XML, mash around various other
6540      options so as to constrain the output somewhat. */
6541   if (VG_(clo_xml)) {
6542      /* Extract as much info as possible from the leak checker. */
6543      MC_(clo_leak_check) = LC_Full;
6544   }
6545
6546   if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
6547      VG_(message)(Vg_UserMsg,
6548                   "Warning: --freelist-big-blocks value %lld has no effect\n"
6549                   "as it is >= to --freelist-vol value %lld\n",
6550                   MC_(clo_freelist_big_blocks),
6551                   MC_(clo_freelist_vol));
6552
6553   tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6554
6555   if (MC_(clo_mc_level) == 3) {
6556      /* We're doing origin tracking. */
6557#     ifdef PERF_FAST_STACK
6558      VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
6559      VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
6560      VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
6561      VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
6562      VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
6563      VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
6564      VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
6565      VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
6566      VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
6567#     endif
6568      VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
6569      VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
6570   } else {
6571      /* Not doing origin tracking */
6572#     ifdef PERF_FAST_STACK
6573      VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
6574      VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
6575      VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
6576      VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
6577      VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
6578      VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
6579      VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
6580      VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
6581      VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
6582#     endif
6583      VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
6584      VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
6585   }
6586
6587   // We assume that brk()/sbrk() does not initialise new memory.  Is this
6588   // accurate?  John Reiser says:
6589   //
6590   //   0) sbrk() can *decrease* process address space.  No zero fill is done
6591   //   for a decrease, not even the fragment on the high end of the last page
6592   //   that is beyond the new highest address.  For maximum safety and
6593   //   portability, then the bytes in the last page that reside above [the
6594   //   new] sbrk(0) should be considered to be uninitialized, but in practice
6595   //   it is exceedingly likely that they will retain their previous
6596   //   contents.
6597   //
6598   //   1) If an increase is large enough to require new whole pages, then
6599   //   those new whole pages (like all new pages) are zero-filled by the
6600   //   operating system.  So if sbrk(0) already is page aligned, then
6601   //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6602   //
6603   //   2) Any increase that lies within an existing allocated page is not
6604   //   changed.  So if (x = sbrk(0)) is not page aligned, then
6605   //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6606   //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6607   //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6608   //   of them come along for the ride because the operating system deals
6609   //   only in whole pages.  Again, for maximum safety and portability, then
6610   //   anything that lives above [the new] sbrk(0) should be considered
6611   //   uninitialized, but in practice will retain previous contents [zero in
6612   //   this case.]"
6613   //
6614   // In short:
6615   //
6616   //   A key property of sbrk/brk is that new whole pages that are supplied
6617   //   by the operating system *do* get initialized to zero.
6618   //
6619   // As for the portability of all this:
6620   //
6621   //   sbrk and brk are not POSIX.  However, any system that is a derivative
6622   //   of *nix has sbrk and brk because there are too many softwares (such as
6623   //   the Bourne shell) which rely on the traditional memory map (.text,
6624   //   .data+.bss, stack) and the existence of sbrk/brk.
6625   //
6626   // So we should arguably observe all this.  However:
6627   // - The current inaccuracy has caused maybe one complaint in seven years(?)
6628   // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6629   //   doubt most programmers know the above information.
6630   // So I'm not terribly unhappy with marking it as undefined. --njn.
6631   //
6632   // [More:  I think most of what John said only applies to sbrk().  It seems
6633   // that brk() always deals in whole pages.  And since this event deals
6634   // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6635   // just mark all memory it allocates as defined.]
6636   //
6637   if (MC_(clo_mc_level) == 3)
6638      VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
6639   else
6640      VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
6641
6642   /* This origin tracking cache is huge (~100M), so only initialise
6643      if we need it. */
6644   if (MC_(clo_mc_level) >= 3) {
6645      init_OCache();
6646      tl_assert(ocacheL1 != NULL);
6647      tl_assert(ocacheL2 != NULL);
6648   } else {
6649      tl_assert(ocacheL1 == NULL);
6650      tl_assert(ocacheL2 == NULL);
6651   }
6652
6653   MC_(chunk_poolalloc) = VG_(newPA)
6654      (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
6655       1000,
6656       VG_(malloc),
6657       "mc.cMC.1 (MC_Chunk pools)",
6658       VG_(free));
6659
6660   /* Do not check definedness of guest state if --undef-value-errors=no */
6661   if (MC_(clo_mc_level) >= 2)
6662      VG_(track_pre_reg_read) ( mc_pre_reg_read );
6663}
6664
6665static void print_SM_info(const HChar* type, Int n_SMs)
6666{
6667   VG_(message)(Vg_DebugMsg,
6668      " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
6669      type,
6670      n_SMs,
6671      n_SMs * sizeof(SecMap) / 1024UL,
6672      n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
6673}
6674
6675static void mc_print_stats (void)
6676{
6677   SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6678
6679   VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
6680                VG_(free_queue_volume), VG_(free_queue_length));
6681   VG_(message)(Vg_DebugMsg,
6682      " memcheck: sanity checks: %d cheap, %d expensive\n",
6683      n_sanity_cheap, n_sanity_expensive );
6684   VG_(message)(Vg_DebugMsg,
6685      " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6686      n_auxmap_L2_nodes,
6687      n_auxmap_L2_nodes * 64,
6688      n_auxmap_L2_nodes / 16 );
6689   VG_(message)(Vg_DebugMsg,
6690      " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6691      n_auxmap_L1_searches, n_auxmap_L1_cmps,
6692      (10ULL * n_auxmap_L1_cmps)
6693         / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6694   );
6695   VG_(message)(Vg_DebugMsg,
6696      " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6697      n_auxmap_L2_searches, n_auxmap_L2_nodes
6698   );
6699
6700   print_SM_info("n_issued     ", n_issued_SMs);
6701   print_SM_info("n_deissued   ", n_deissued_SMs);
6702   print_SM_info("max_noaccess ", max_noaccess_SMs);
6703   print_SM_info("max_undefined", max_undefined_SMs);
6704   print_SM_info("max_defined  ", max_defined_SMs);
6705   print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
6706
6707   // Three DSMs, plus the non-DSM ones
6708   max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6709   // The 3*sizeof(Word) bytes is the AVL node metadata size.
6710   // The VG_ROUNDUP is because the OSet pool allocator will/must align
6711   // the elements on pointer size.
6712   // Note that the pool allocator has some additional small overhead
6713   // which is not counted in the below.
6714   // Hardwiring this logic sucks, but I don't see how else to do it.
6715   max_secVBit_szB = max_secVBit_nodes *
6716         (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
6717   max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6718
6719   VG_(message)(Vg_DebugMsg,
6720      " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
6721      max_secVBit_nodes, max_secVBit_szB / 1024,
6722                         max_secVBit_szB / (1024 * 1024));
6723   VG_(message)(Vg_DebugMsg,
6724      " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6725      sec_vbits_new_nodes + sec_vbits_updates,
6726      sec_vbits_new_nodes, sec_vbits_updates );
6727   VG_(message)(Vg_DebugMsg,
6728      " memcheck: max shadow mem size:   %ldk, %ldM\n",
6729      max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6730
6731   if (MC_(clo_mc_level) >= 3) {
6732      VG_(message)(Vg_DebugMsg,
6733                   " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
6734                   stats_ocacheL1_find,
6735                   stats_ocacheL1_misses,
6736                   stats_ocacheL1_lossage );
6737      VG_(message)(Vg_DebugMsg,
6738                   " ocacheL1: %'12lu at 0   %'12lu at 1\n",
6739                   stats_ocacheL1_find - stats_ocacheL1_misses
6740                      - stats_ocacheL1_found_at_1
6741                      - stats_ocacheL1_found_at_N,
6742                   stats_ocacheL1_found_at_1 );
6743      VG_(message)(Vg_DebugMsg,
6744                   " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
6745                   stats_ocacheL1_found_at_N,
6746                   stats_ocacheL1_movefwds );
6747      VG_(message)(Vg_DebugMsg,
6748                   " ocacheL1: %'12lu sizeB  %'12u useful\n",
6749                   (UWord)sizeof(OCache),
6750                   4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6751      VG_(message)(Vg_DebugMsg,
6752                   " ocacheL2: %'12lu refs   %'12lu misses\n",
6753                   stats__ocacheL2_refs,
6754                   stats__ocacheL2_misses );
6755      VG_(message)(Vg_DebugMsg,
6756                   " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
6757                   stats__ocacheL2_n_nodes_max,
6758                   stats__ocacheL2_n_nodes );
6759      VG_(message)(Vg_DebugMsg,
6760                   " niacache: %'12lu refs   %'12lu misses\n",
6761                   stats__nia_cache_queries, stats__nia_cache_misses);
6762   } else {
6763      tl_assert(ocacheL1 == NULL);
6764      tl_assert(ocacheL2 == NULL);
6765   }
6766}
6767
6768
6769static void mc_fini ( Int exitcode )
6770{
6771   MC_(print_malloc_stats)();
6772
6773   if (MC_(clo_leak_check) != LC_Off) {
6774      LeakCheckParams lcp;
6775      lcp.mode = MC_(clo_leak_check);
6776      lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
6777      lcp.heuristics = MC_(clo_leak_check_heuristics);
6778      lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
6779      lcp.deltamode = LCD_Any;
6780      lcp.max_loss_records_output = 999999999;
6781      lcp.requested_by_monitor_command = False;
6782      MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
6783   } else {
6784      if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6785         VG_(umsg)(
6786            "For a detailed leak analysis, rerun with: --leak-check=full\n"
6787            "\n"
6788         );
6789      }
6790   }
6791
6792   if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6793      VG_(message)(Vg_UserMsg,
6794                   "For counts of detected and suppressed errors, rerun with: -v\n");
6795   }
6796
6797   if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6798       && MC_(clo_mc_level) == 2) {
6799      VG_(message)(Vg_UserMsg,
6800                   "Use --track-origins=yes to see where "
6801                   "uninitialised values come from\n");
6802   }
6803
6804   /* Print a warning if any client-request generated ignore-ranges
6805      still exist.  It would be reasonable to expect that a properly
6806      written program would remove any such ranges before exiting, and
6807      since they are a bit on the dangerous side, let's comment.  By
6808      contrast ranges which are specified on the command line normally
6809      pertain to hardware mapped into the address space, and so we
6810      can't expect the client to have got rid of them. */
6811   if (gIgnoredAddressRanges) {
6812      Word i, nBad = 0;
6813      for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6814         UWord val     = IAR_INVALID;
6815         UWord key_min = ~(UWord)0;
6816         UWord key_max = (UWord)0;
6817         VG_(indexRangeMap)( &key_min, &key_max, &val,
6818                             gIgnoredAddressRanges, i );
6819         if (val != IAR_ClientReq)
6820           continue;
6821         /* Print the offending range.  Also, if it is the first,
6822            print a banner before it. */
6823         nBad++;
6824         if (nBad == 1) {
6825            VG_(umsg)(
6826              "WARNING: exiting program has the following client-requested\n"
6827              "WARNING: address error disablement range(s) still in force,\n"
6828              "WARNING: "
6829                 "possibly as a result of some mistake in the use of the\n"
6830              "WARNING: "
6831                 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
6832            );
6833         }
6834         VG_(umsg)("   [%ld]  0x%016llx-0x%016llx  %s\n",
6835                   i, (ULong)key_min, (ULong)key_max, showIARKind(val));
6836      }
6837   }
6838
6839   done_prof_mem();
6840
6841   if (VG_(clo_stats))
6842      mc_print_stats();
6843
6844   if (0) {
6845      VG_(message)(Vg_DebugMsg,
6846        "------ Valgrind's client block stats follow ---------------\n" );
6847      show_client_block_stats();
6848   }
6849}
6850
6851/* mark the given addr/len unaddressable for watchpoint implementation
6852   The PointKind will be handled at access time */
6853static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6854                                                  Addr addr, SizeT len)
6855{
6856   /* GDBTD this is somewhat fishy. We might rather have to save the previous
6857      accessibility and definedness in gdbserver so as to allow restoring it
6858      properly. Currently, we assume that the user only watches things
6859      which are properly addressable and defined */
6860   if (insert)
6861      MC_(make_mem_noaccess) (addr, len);
6862   else
6863      MC_(make_mem_defined)  (addr, len);
6864   return True;
6865}
6866
6867static void mc_pre_clo_init(void)
6868{
6869   VG_(details_name)            ("Memcheck");
6870   VG_(details_version)         (NULL);
6871   VG_(details_description)     ("a memory error detector");
6872   VG_(details_copyright_author)(
6873      "Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.");
6874   VG_(details_bug_reports_to)  (VG_BUGS_TO);
6875   VG_(details_avg_translation_sizeB) ( 640 );
6876
6877   VG_(basic_tool_funcs)          (mc_post_clo_init,
6878                                   MC_(instrument),
6879                                   mc_fini);
6880
6881   VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
6882
6883
6884   VG_(needs_core_errors)         ();
6885   VG_(needs_tool_errors)         (MC_(eq_Error),
6886                                   MC_(before_pp_Error),
6887                                   MC_(pp_Error),
6888                                   True,/*show TIDs for errors*/
6889                                   MC_(update_Error_extra),
6890                                   MC_(is_recognised_suppression),
6891                                   MC_(read_extra_suppression_info),
6892                                   MC_(error_matches_suppression),
6893                                   MC_(get_error_name),
6894                                   MC_(get_extra_suppression_info),
6895                                   MC_(print_extra_suppression_use),
6896                                   MC_(update_extra_suppression_use));
6897   VG_(needs_libc_freeres)        ();
6898   VG_(needs_command_line_options)(mc_process_cmd_line_options,
6899                                   mc_print_usage,
6900                                   mc_print_debug_usage);
6901   VG_(needs_client_requests)     (mc_handle_client_request);
6902   VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
6903                                   mc_expensive_sanity_check);
6904   VG_(needs_print_stats)         (mc_print_stats);
6905   VG_(needs_info_location)       (MC_(pp_describe_addr));
6906   VG_(needs_malloc_replacement)  (MC_(malloc),
6907                                   MC_(__builtin_new),
6908                                   MC_(__builtin_vec_new),
6909                                   MC_(memalign),
6910                                   MC_(calloc),
6911                                   MC_(free),
6912                                   MC_(__builtin_delete),
6913                                   MC_(__builtin_vec_delete),
6914                                   MC_(realloc),
6915                                   MC_(malloc_usable_size),
6916                                   MC_MALLOC_DEFAULT_REDZONE_SZB );
6917   MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
6918
6919   VG_(needs_xml_output)          ();
6920
6921   VG_(track_new_mem_startup)     ( mc_new_mem_startup );
6922
6923   // Handling of mmap and mprotect isn't simple (well, it is simple,
6924   // but the justification isn't.)  See comments above, just prior to
6925   // mc_new_mem_mmap.
6926   VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
6927   VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6928
6929   VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
6930
6931   VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6932   VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
6933   VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
6934
6935   /* Defer the specification of the new_mem_stack functions to the
6936      post_clo_init function, since we need to first parse the command
6937      line before deciding which set to use. */
6938
6939#  ifdef PERF_FAST_STACK
6940   VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
6941   VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
6942   VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
6943   VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
6944   VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
6945   VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
6946   VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
6947   VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
6948   VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
6949#  endif
6950   VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
6951
6952   VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
6953
6954   VG_(track_pre_mem_read)        ( check_mem_is_defined );
6955   VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6956   VG_(track_pre_mem_write)       ( check_mem_is_addressable );
6957   VG_(track_post_mem_write)      ( mc_post_mem_write );
6958
6959   VG_(track_post_reg_write)                  ( mc_post_reg_write );
6960   VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6961
6962   VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
6963
6964   init_shadow_memory();
6965   // MC_(chunk_poolalloc) must be allocated in post_clo_init
6966   tl_assert(MC_(chunk_poolalloc) == NULL);
6967   MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
6968   MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6969   init_prof_mem();
6970
6971   tl_assert( mc_expensive_sanity_check() );
6972
6973   // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6974   tl_assert(sizeof(UWord) == sizeof(Addr));
6975   // Call me paranoid.  I don't care.
6976   tl_assert(sizeof(void*) == sizeof(Addr));
6977
6978   // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6979   tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6980
6981   /* This is small.  Always initialise it. */
6982   init_nia_to_ecu_cache();
6983
6984   /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6985      if we need to, since the command line args haven't been
6986      processed yet.  Hence defer it to mc_post_clo_init. */
6987   tl_assert(ocacheL1 == NULL);
6988   tl_assert(ocacheL2 == NULL);
6989
6990   /* Check some important stuff.  See extensive comments above
6991      re UNALIGNED_OR_HIGH for background. */
6992#  if VG_WORDSIZE == 4
6993   tl_assert(sizeof(void*) == 4);
6994   tl_assert(sizeof(Addr)  == 4);
6995   tl_assert(sizeof(UWord) == 4);
6996   tl_assert(sizeof(Word)  == 4);
6997   tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6998   tl_assert(MASK(1) == 0UL);
6999   tl_assert(MASK(2) == 1UL);
7000   tl_assert(MASK(4) == 3UL);
7001   tl_assert(MASK(8) == 7UL);
7002#  else
7003   tl_assert(VG_WORDSIZE == 8);
7004   tl_assert(sizeof(void*) == 8);
7005   tl_assert(sizeof(Addr)  == 8);
7006   tl_assert(sizeof(UWord) == 8);
7007   tl_assert(sizeof(Word)  == 8);
7008   tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
7009   tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
7010   tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
7011   tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
7012   tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
7013#  endif
7014}
7015
7016VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
7017
7018/*--------------------------------------------------------------------*/
7019/*--- end                                                mc_main.c ---*/
7020/*--------------------------------------------------------------------*/
7021