1
2/*--------------------------------------------------------------------*/
3/*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
4/*--- accessibility (A) and validity (V) status of each byte.      ---*/
5/*---                                                    mc_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of MemCheck, a heavyweight Valgrind tool for
10   detecting memory errors.
11
12   Copyright (C) 2000-2012 Julian Seward
13      jseward@acm.org
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_aspacemgr.h"
35#include "pub_tool_gdbserver.h"
36#include "pub_tool_poolalloc.h"
37#include "pub_tool_hashtable.h"     // For mc_include.h
38#include "pub_tool_libcbase.h"
39#include "pub_tool_libcassert.h"
40#include "pub_tool_libcprint.h"
41#include "pub_tool_machine.h"
42#include "pub_tool_mallocfree.h"
43#include "pub_tool_options.h"
44#include "pub_tool_oset.h"
45#include "pub_tool_replacemalloc.h"
46#include "pub_tool_tooliface.h"
47#include "pub_tool_threadstate.h"
48
49#include "mc_include.h"
50#include "memcheck.h"   /* for client requests */
51
52
53/* Set to 1 to do a little more sanity checking */
54#define VG_DEBUG_MEMORY 0
55
56#define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
57
58static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
59static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
60
61
62/*------------------------------------------------------------*/
63/*--- Fast-case knobs                                      ---*/
64/*------------------------------------------------------------*/
65
66// Comment these out to disable the fast cases (don't just set them to zero).
67
68#define PERF_FAST_LOADV    1
69#define PERF_FAST_STOREV   1
70
71#define PERF_FAST_SARP     1
72
73#define PERF_FAST_STACK    1
74#define PERF_FAST_STACK2   1
75
76/* Change this to 1 to enable assertions on origin tracking cache fast
77   paths */
78#define OC_ENABLE_ASSERTIONS 0
79
80
81/*------------------------------------------------------------*/
82/*--- Comments on the origin tracking implementation       ---*/
83/*------------------------------------------------------------*/
84
85/* See detailed comment entitled
86   AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
87   which is contained further on in this file. */
88
89
90/*------------------------------------------------------------*/
91/*--- V bits and A bits                                    ---*/
92/*------------------------------------------------------------*/
93
94/* Conceptually, every byte value has 8 V bits, which track whether Memcheck
95   thinks the corresponding value bit is defined.  And every memory byte
96   has an A bit, which tracks whether Memcheck thinks the program can access
97   it safely (ie. it's mapped, and has at least one of the RWX permission bits
98   set).  So every N-bit register is shadowed with N V bits, and every memory
99   byte is shadowed with 8 V bits and one A bit.
100
101   In the implementation, we use two forms of compression (compressed V bits
102   and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
103   for memory.
104
105   Memcheck also tracks extra information about each heap block that is
106   allocated, for detecting memory leaks and other purposes.
107*/
108
109/*------------------------------------------------------------*/
110/*--- Basic A/V bitmap representation.                     ---*/
111/*------------------------------------------------------------*/
112
113/* All reads and writes are checked against a memory map (a.k.a. shadow
114   memory), which records the state of all memory in the process.
115
116   On 32-bit machines the memory map is organised as follows.
117   The top 16 bits of an address are used to index into a top-level
118   map table, containing 65536 entries.  Each entry is a pointer to a
119   second-level map, which records the accesibililty and validity
120   permissions for the 65536 bytes indexed by the lower 16 bits of the
121   address.  Each byte is represented by two bits (details are below).  So
122   each second-level map contains 16384 bytes.  This two-level arrangement
123   conveniently divides the 4G address space into 64k lumps, each size 64k
124   bytes.
125
126   All entries in the primary (top-level) map must point to a valid
127   secondary (second-level) map.  Since many of the 64kB chunks will
128   have the same status for every bit -- ie. noaccess (for unused
129   address space) or entirely addressable and defined (for code segments) --
130   there are three distinguished secondary maps, which indicate 'noaccess',
131   'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
132   map entry points to the relevant distinguished map.  In practice,
133   typically more than half of the addressable memory is represented with
134   the 'undefined' or 'defined' distinguished secondary map, so it gives a
135   good saving.  It also lets us set the V+A bits of large address regions
136   quickly in set_address_range_perms().
137
138   On 64-bit machines it's more complicated.  If we followed the same basic
139   scheme we'd have a four-level table which would require too many memory
140   accesses.  So instead the top-level map table has 2^19 entries (indexed
141   using bits 16..34 of the address);  this covers the bottom 32GB.  Any
142   accesses above 32GB are handled with a slow, sparse auxiliary table.
143   Valgrind's address space manager tries very hard to keep things below
144   this 32GB barrier so that performance doesn't suffer too much.
145
146   Note that this file has a lot of different functions for reading and
147   writing shadow memory.  Only a couple are strictly necessary (eg.
148   get_vabits2 and set_vabits2), most are just specialised for specific
149   common cases to improve performance.
150
151   Aside: the V+A bits are less precise than they could be -- we have no way
152   of marking memory as read-only.  It would be great if we could add an
153   extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
154   which requires 2.3 bits to hold, and there's no way to do that elegantly
155   -- we'd have to double up to 4 bits of metadata per byte, which doesn't
156   seem worth it.
157*/
158
159/* --------------- Basic configuration --------------- */
160
161/* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
162
163#if VG_WORDSIZE == 4
164
165/* cover the entire address space */
166#  define N_PRIMARY_BITS  16
167
168#else
169
170/* Just handle the first 32G fast and the rest via auxiliary
171   primaries.  If you change this, Memcheck will assert at startup.
172   See the definition of UNALIGNED_OR_HIGH for extensive comments. */
173#  define N_PRIMARY_BITS  19
174
175#endif
176
177
178/* Do not change this. */
179#define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
180
181/* Do not change this. */
182#define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
183
184
185/* --------------- Secondary maps --------------- */
186
187// Each byte of memory conceptually has an A bit, which indicates its
188// addressability, and 8 V bits, which indicates its definedness.
189//
190// But because very few bytes are partially defined, we can use a nice
191// compression scheme to reduce the size of shadow memory.  Each byte of
192// memory has 2 bits which indicates its state (ie. V+A bits):
193//
194//   00:  noaccess    (unaddressable but treated as fully defined)
195//   01:  undefined   (addressable and fully undefined)
196//   10:  defined     (addressable and fully defined)
197//   11:  partdefined (addressable and partially defined)
198//
199// In the "partdefined" case, we use a secondary table to store the V bits.
200// Each entry in the secondary-V-bits table maps a byte address to its 8 V
201// bits.
202//
203// We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
204// four bytes (32 bits) of memory are in each chunk.  Hence the name
205// "vabits8".  This lets us get the V+A bits for four bytes at a time
206// easily (without having to do any shifting and/or masking), and that is a
207// very common operation.  (Note that although each vabits8 chunk
208// is 8 bits in size, it represents 32 bits of memory.)
209//
210// The representation is "inverse" little-endian... each 4 bytes of
211// memory is represented by a 1 byte value, where:
212//
213// - the status of byte (a+0) is held in bits [1..0]
214// - the status of byte (a+1) is held in bits [3..2]
215// - the status of byte (a+2) is held in bits [5..4]
216// - the status of byte (a+3) is held in bits [7..6]
217//
218// It's "inverse" because endianness normally describes a mapping from
219// value bits to memory addresses;  in this case the mapping is inverted.
220// Ie. instead of particular value bits being held in certain addresses, in
221// this case certain addresses are represented by particular value bits.
222// See insert_vabits2_into_vabits8() for an example.
223//
224// But note that we don't compress the V bits stored in registers;  they
225// need to be explicit to made the shadow operations possible.  Therefore
226// when moving values between registers and memory we need to convert
227// between the expanded in-register format and the compressed in-memory
228// format.  This isn't so difficult, it just requires careful attention in a
229// few places.
230
231// These represent eight bits of memory.
232#define VA_BITS2_NOACCESS     0x0      // 00b
233#define VA_BITS2_UNDEFINED    0x1      // 01b
234#define VA_BITS2_DEFINED      0x2      // 10b
235#define VA_BITS2_PARTDEFINED  0x3      // 11b
236
237// These represent 16 bits of memory.
238#define VA_BITS4_NOACCESS     0x0      // 00_00b
239#define VA_BITS4_UNDEFINED    0x5      // 01_01b
240#define VA_BITS4_DEFINED      0xa      // 10_10b
241
242// These represent 32 bits of memory.
243#define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
244#define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
245#define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
246
247// These represent 64 bits of memory.
248#define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
249#define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
250#define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
251
252
253#define SM_CHUNKS             16384
254#define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
255#define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
256
257// Paranoia:  it's critical for performance that the requested inlining
258// occurs.  So try extra hard.
259#define INLINE    inline __attribute__((always_inline))
260
261static INLINE Addr start_of_this_sm ( Addr a ) {
262   return (a & (~SM_MASK));
263}
264static INLINE Bool is_start_of_sm ( Addr a ) {
265   return (start_of_this_sm(a) == a);
266}
267
268typedef
269   struct {
270      UChar vabits8[SM_CHUNKS];
271   }
272   SecMap;
273
274// 3 distinguished secondary maps, one for no-access, one for
275// accessible but undefined, and one for accessible and defined.
276// Distinguished secondaries may never be modified.
277#define SM_DIST_NOACCESS   0
278#define SM_DIST_UNDEFINED  1
279#define SM_DIST_DEFINED    2
280
281static SecMap sm_distinguished[3];
282
283static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
284   return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
285}
286
287// Forward declaration
288static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
289
290/* dist_sm points to one of our three distinguished secondaries.  Make
291   a copy of it so that we can write to it.
292*/
293static SecMap* copy_for_writing ( SecMap* dist_sm )
294{
295   SecMap* new_sm;
296   tl_assert(dist_sm == &sm_distinguished[0]
297          || dist_sm == &sm_distinguished[1]
298          || dist_sm == &sm_distinguished[2]);
299
300   new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
301   if (new_sm == NULL)
302      VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
303                                   sizeof(SecMap) );
304   VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
305   update_SM_counts(dist_sm, new_sm);
306   return new_sm;
307}
308
309/* --------------- Stats --------------- */
310
311static Int   n_issued_SMs      = 0;
312static Int   n_deissued_SMs    = 0;
313static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
314static Int   n_undefined_SMs   = 0;
315static Int   n_defined_SMs     = 0;
316static Int   n_non_DSM_SMs     = 0;
317static Int   max_noaccess_SMs  = 0;
318static Int   max_undefined_SMs = 0;
319static Int   max_defined_SMs   = 0;
320static Int   max_non_DSM_SMs   = 0;
321
322/* # searches initiated in auxmap_L1, and # base cmps required */
323static ULong n_auxmap_L1_searches  = 0;
324static ULong n_auxmap_L1_cmps      = 0;
325/* # of searches that missed in auxmap_L1 and therefore had to
326   be handed to auxmap_L2. And the number of nodes inserted. */
327static ULong n_auxmap_L2_searches  = 0;
328static ULong n_auxmap_L2_nodes     = 0;
329
330static Int   n_sanity_cheap     = 0;
331static Int   n_sanity_expensive = 0;
332
333static Int   n_secVBit_nodes   = 0;
334static Int   max_secVBit_nodes = 0;
335
336static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
337{
338   if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
339   else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
340   else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
341   else                                                  { n_non_DSM_SMs  --;
342                                                           n_deissued_SMs ++; }
343
344   if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
345   else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
346   else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
347   else                                                  { n_non_DSM_SMs  ++;
348                                                           n_issued_SMs   ++; }
349
350   if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
351   if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
352   if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
353   if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
354}
355
356/* --------------- Primary maps --------------- */
357
358/* The main primary map.  This covers some initial part of the address
359   space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
360   handled using the auxiliary primary map.
361*/
362static SecMap* primary_map[N_PRIMARY_MAP];
363
364
365/* An entry in the auxiliary primary map.  base must be a 64k-aligned
366   value, and sm points at the relevant secondary map.  As with the
367   main primary map, the secondary may be either a real secondary, or
368   one of the three distinguished secondaries.  DO NOT CHANGE THIS
369   LAYOUT: the first word has to be the key for OSet fast lookups.
370*/
371typedef
372   struct {
373      Addr    base;
374      SecMap* sm;
375   }
376   AuxMapEnt;
377
378/* Tunable parameter: How big is the L1 queue? */
379#define N_AUXMAP_L1 24
380
381/* Tunable parameter: How far along the L1 queue to insert
382   entries resulting from L2 lookups? */
383#define AUXMAP_L1_INSERT_IX 12
384
385static struct {
386          Addr       base;
387          AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
388       }
389       auxmap_L1[N_AUXMAP_L1];
390
391static OSet* auxmap_L2 = NULL;
392
393static void init_auxmap_L1_L2 ( void )
394{
395   Int i;
396   for (i = 0; i < N_AUXMAP_L1; i++) {
397      auxmap_L1[i].base = 0;
398      auxmap_L1[i].ent  = NULL;
399   }
400
401   tl_assert(0 == offsetof(AuxMapEnt,base));
402   tl_assert(sizeof(Addr) == sizeof(void*));
403   auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
404                                    /*fastCmp*/ NULL,
405                                    VG_(malloc), "mc.iaLL.1", VG_(free) );
406}
407
408/* Check representation invariants; if OK return NULL; else a
409   descriptive bit of text.  Also return the number of
410   non-distinguished secondary maps referred to from the auxiliary
411   primary maps. */
412
413static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
414{
415   Word i, j;
416   /* On a 32-bit platform, the L2 and L1 tables should
417      both remain empty forever.
418
419      On a 64-bit platform:
420      In the L2 table:
421       all .base & 0xFFFF == 0
422       all .base > MAX_PRIMARY_ADDRESS
423      In the L1 table:
424       all .base & 0xFFFF == 0
425       all (.base > MAX_PRIMARY_ADDRESS
426            .base & 0xFFFF == 0
427            and .ent points to an AuxMapEnt with the same .base)
428           or
429           (.base == 0 and .ent == NULL)
430   */
431   *n_secmaps_found = 0;
432   if (sizeof(void*) == 4) {
433      /* 32-bit platform */
434      if (VG_(OSetGen_Size)(auxmap_L2) != 0)
435         return "32-bit: auxmap_L2 is non-empty";
436      for (i = 0; i < N_AUXMAP_L1; i++)
437        if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
438      return "32-bit: auxmap_L1 is non-empty";
439   } else {
440      /* 64-bit platform */
441      UWord elems_seen = 0;
442      AuxMapEnt *elem, *res;
443      AuxMapEnt key;
444      /* L2 table */
445      VG_(OSetGen_ResetIter)(auxmap_L2);
446      while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
447         elems_seen++;
448         if (0 != (elem->base & (Addr)0xFFFF))
449            return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
450         if (elem->base <= MAX_PRIMARY_ADDRESS)
451            return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
452         if (elem->sm == NULL)
453            return "64-bit: .sm in _L2 is NULL";
454         if (!is_distinguished_sm(elem->sm))
455            (*n_secmaps_found)++;
456      }
457      if (elems_seen != n_auxmap_L2_nodes)
458         return "64-bit: disagreement on number of elems in _L2";
459      /* Check L1-L2 correspondence */
460      for (i = 0; i < N_AUXMAP_L1; i++) {
461         if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
462            continue;
463         if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
464            return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
465         if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
466            return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
467         if (auxmap_L1[i].ent == NULL)
468            return "64-bit: .ent is NULL in auxmap_L1";
469         if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
470            return "64-bit: _L1 and _L2 bases are inconsistent";
471         /* Look it up in auxmap_L2. */
472         key.base = auxmap_L1[i].base;
473         key.sm   = 0;
474         res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
475         if (res == NULL)
476            return "64-bit: _L1 .base not found in _L2";
477         if (res != auxmap_L1[i].ent)
478            return "64-bit: _L1 .ent disagrees with _L2 entry";
479      }
480      /* Check L1 contains no duplicates */
481      for (i = 0; i < N_AUXMAP_L1; i++) {
482         if (auxmap_L1[i].base == 0)
483            continue;
484	 for (j = i+1; j < N_AUXMAP_L1; j++) {
485            if (auxmap_L1[j].base == 0)
486               continue;
487            if (auxmap_L1[j].base == auxmap_L1[i].base)
488               return "64-bit: duplicate _L1 .base entries";
489         }
490      }
491   }
492   return NULL; /* ok */
493}
494
495static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
496{
497   Word i;
498   tl_assert(ent);
499   tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
500   for (i = N_AUXMAP_L1-1; i > rank; i--)
501      auxmap_L1[i] = auxmap_L1[i-1];
502   auxmap_L1[rank].base = ent->base;
503   auxmap_L1[rank].ent  = ent;
504}
505
506static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
507{
508   AuxMapEnt  key;
509   AuxMapEnt* res;
510   Word       i;
511
512   tl_assert(a > MAX_PRIMARY_ADDRESS);
513   a &= ~(Addr)0xFFFF;
514
515   /* First search the front-cache, which is a self-organising
516      list containing the most popular entries. */
517
518   if (LIKELY(auxmap_L1[0].base == a))
519      return auxmap_L1[0].ent;
520   if (LIKELY(auxmap_L1[1].base == a)) {
521      Addr       t_base = auxmap_L1[0].base;
522      AuxMapEnt* t_ent  = auxmap_L1[0].ent;
523      auxmap_L1[0].base = auxmap_L1[1].base;
524      auxmap_L1[0].ent  = auxmap_L1[1].ent;
525      auxmap_L1[1].base = t_base;
526      auxmap_L1[1].ent  = t_ent;
527      return auxmap_L1[0].ent;
528   }
529
530   n_auxmap_L1_searches++;
531
532   for (i = 0; i < N_AUXMAP_L1; i++) {
533      if (auxmap_L1[i].base == a) {
534         break;
535      }
536   }
537   tl_assert(i >= 0 && i <= N_AUXMAP_L1);
538
539   n_auxmap_L1_cmps += (ULong)(i+1);
540
541   if (i < N_AUXMAP_L1) {
542      if (i > 0) {
543         Addr       t_base = auxmap_L1[i-1].base;
544         AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
545         auxmap_L1[i-1].base = auxmap_L1[i-0].base;
546         auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
547         auxmap_L1[i-0].base = t_base;
548         auxmap_L1[i-0].ent  = t_ent;
549         i--;
550      }
551      return auxmap_L1[i].ent;
552   }
553
554   n_auxmap_L2_searches++;
555
556   /* First see if we already have it. */
557   key.base = a;
558   key.sm   = 0;
559
560   res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
561   if (res)
562      insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
563   return res;
564}
565
566static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
567{
568   AuxMapEnt *nyu, *res;
569
570   /* First see if we already have it. */
571   res = maybe_find_in_auxmap( a );
572   if (LIKELY(res))
573      return res;
574
575   /* Ok, there's no entry in the secondary map, so we'll have
576      to allocate one. */
577   a &= ~(Addr)0xFFFF;
578
579   nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
580   tl_assert(nyu);
581   nyu->base = a;
582   nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
583   VG_(OSetGen_Insert)( auxmap_L2, nyu );
584   insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
585   n_auxmap_L2_nodes++;
586   return nyu;
587}
588
589/* --------------- SecMap fundamentals --------------- */
590
591// In all these, 'low' means it's definitely in the main primary map,
592// 'high' means it's definitely in the auxiliary table.
593
594static INLINE SecMap** get_secmap_low_ptr ( Addr a )
595{
596   UWord pm_off = a >> 16;
597#  if VG_DEBUG_MEMORY >= 1
598   tl_assert(pm_off < N_PRIMARY_MAP);
599#  endif
600   return &primary_map[ pm_off ];
601}
602
603static INLINE SecMap** get_secmap_high_ptr ( Addr a )
604{
605   AuxMapEnt* am = find_or_alloc_in_auxmap(a);
606   return &am->sm;
607}
608
609static SecMap** get_secmap_ptr ( Addr a )
610{
611   return ( a <= MAX_PRIMARY_ADDRESS
612          ? get_secmap_low_ptr(a)
613          : get_secmap_high_ptr(a));
614}
615
616static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
617{
618   return *get_secmap_low_ptr(a);
619}
620
621static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
622{
623   return *get_secmap_high_ptr(a);
624}
625
626static INLINE SecMap* get_secmap_for_writing_low(Addr a)
627{
628   SecMap** p = get_secmap_low_ptr(a);
629   if (UNLIKELY(is_distinguished_sm(*p)))
630      *p = copy_for_writing(*p);
631   return *p;
632}
633
634static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
635{
636   SecMap** p = get_secmap_high_ptr(a);
637   if (UNLIKELY(is_distinguished_sm(*p)))
638      *p = copy_for_writing(*p);
639   return *p;
640}
641
642/* Produce the secmap for 'a', either from the primary map or by
643   ensuring there is an entry for it in the aux primary map.  The
644   secmap may be a distinguished one as the caller will only want to
645   be able to read it.
646*/
647static INLINE SecMap* get_secmap_for_reading ( Addr a )
648{
649   return ( a <= MAX_PRIMARY_ADDRESS
650          ? get_secmap_for_reading_low (a)
651          : get_secmap_for_reading_high(a) );
652}
653
654/* Produce the secmap for 'a', either from the primary map or by
655   ensuring there is an entry for it in the aux primary map.  The
656   secmap may not be a distinguished one, since the caller will want
657   to be able to write it.  If it is a distinguished secondary, make a
658   writable copy of it, install it, and return the copy instead.  (COW
659   semantics).
660*/
661static SecMap* get_secmap_for_writing ( Addr a )
662{
663   return ( a <= MAX_PRIMARY_ADDRESS
664          ? get_secmap_for_writing_low (a)
665          : get_secmap_for_writing_high(a) );
666}
667
668/* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
669   allocate one if one doesn't already exist.  This is used by the
670   leak checker.
671*/
672static SecMap* maybe_get_secmap_for ( Addr a )
673{
674   if (a <= MAX_PRIMARY_ADDRESS) {
675      return get_secmap_for_reading_low(a);
676   } else {
677      AuxMapEnt* am = maybe_find_in_auxmap(a);
678      return am ? am->sm : NULL;
679   }
680}
681
682/* --------------- Fundamental functions --------------- */
683
684static INLINE
685void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
686{
687   UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
688   *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
689   *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
690}
691
692static INLINE
693void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
694{
695   UInt shift;
696   tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
697   shift     =  (a & 2)   << 1;        // shift by 0 or 4
698   *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
699   *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
700}
701
702static INLINE
703UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
704{
705   UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
706   vabits8 >>= shift;                  // shift the two bits to the bottom
707   return 0x3 & vabits8;               // mask out the rest
708}
709
710static INLINE
711UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
712{
713   UInt shift;
714   tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
715   shift = (a & 2) << 1;               // shift by 0 or 4
716   vabits8 >>= shift;                  // shift the four bits to the bottom
717   return 0xf & vabits8;               // mask out the rest
718}
719
720// Note that these four are only used in slow cases.  The fast cases do
721// clever things like combine the auxmap check (in
722// get_secmap_{read,writ}able) with alignment checks.
723
724// *** WARNING! ***
725// Any time this function is called, if it is possible that vabits2
726// is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
727// sec-V-bits table must also be set!
728static INLINE
729void set_vabits2 ( Addr a, UChar vabits2 )
730{
731   SecMap* sm       = get_secmap_for_writing(a);
732   UWord   sm_off   = SM_OFF(a);
733   insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
734}
735
736static INLINE
737UChar get_vabits2 ( Addr a )
738{
739   SecMap* sm       = get_secmap_for_reading(a);
740   UWord   sm_off   = SM_OFF(a);
741   UChar   vabits8  = sm->vabits8[sm_off];
742   return extract_vabits2_from_vabits8(a, vabits8);
743}
744
745// *** WARNING! ***
746// Any time this function is called, if it is possible that any of the
747// 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
748// corresponding entry(s) in the sec-V-bits table must also be set!
749static INLINE
750UChar get_vabits8_for_aligned_word32 ( Addr a )
751{
752   SecMap* sm       = get_secmap_for_reading(a);
753   UWord   sm_off   = SM_OFF(a);
754   UChar   vabits8  = sm->vabits8[sm_off];
755   return vabits8;
756}
757
758static INLINE
759void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
760{
761   SecMap* sm       = get_secmap_for_writing(a);
762   UWord   sm_off   = SM_OFF(a);
763   sm->vabits8[sm_off] = vabits8;
764}
765
766
767// Forward declarations
768static UWord get_sec_vbits8(Addr a);
769static void  set_sec_vbits8(Addr a, UWord vbits8);
770
771// Returns False if there was an addressability error.
772static INLINE
773Bool set_vbits8 ( Addr a, UChar vbits8 )
774{
775   Bool  ok      = True;
776   UChar vabits2 = get_vabits2(a);
777   if ( VA_BITS2_NOACCESS != vabits2 ) {
778      // Addressable.  Convert in-register format to in-memory format.
779      // Also remove any existing sec V bit entry for the byte if no
780      // longer necessary.
781      if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
782      else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
783      else                                    { vabits2 = VA_BITS2_PARTDEFINED;
784                                                set_sec_vbits8(a, vbits8);  }
785      set_vabits2(a, vabits2);
786
787   } else {
788      // Unaddressable!  Do nothing -- when writing to unaddressable
789      // memory it acts as a black hole, and the V bits can never be seen
790      // again.  So we don't have to write them at all.
791      ok = False;
792   }
793   return ok;
794}
795
796// Returns False if there was an addressability error.  In that case, we put
797// all defined bits into vbits8.
798static INLINE
799Bool get_vbits8 ( Addr a, UChar* vbits8 )
800{
801   Bool  ok      = True;
802   UChar vabits2 = get_vabits2(a);
803
804   // Convert the in-memory format to in-register format.
805   if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
806   else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
807   else if ( VA_BITS2_NOACCESS  == vabits2 ) {
808      *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
809      ok = False;
810   } else {
811      tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
812      *vbits8 = get_sec_vbits8(a);
813   }
814   return ok;
815}
816
817
818/* --------------- Secondary V bit table ------------ */
819
820// This table holds the full V bit pattern for partially-defined bytes
821// (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
822// memory.
823//
824// Note: the nodes in this table can become stale.  Eg. if you write a PDB,
825// then overwrite the same address with a fully defined byte, the sec-V-bit
826// node will not necessarily be removed.  This is because checking for
827// whether removal is necessary would slow down the fast paths.
828//
829// To avoid the stale nodes building up too much, we periodically (once the
830// table reaches a certain size) garbage collect (GC) the table by
831// traversing it and evicting any nodes not having PDB.
832// If more than a certain proportion of nodes survived, we increase the
833// table size so that GCs occur less often.
834//
835// This policy is designed to avoid bad table bloat in the worst case where
836// a program creates huge numbers of stale PDBs -- we would get this bloat
837// if we had no GC -- while handling well the case where a node becomes
838// stale but shortly afterwards is rewritten with a PDB and so becomes
839// non-stale again (which happens quite often, eg. in perf/bz2).  If we just
840// remove all stale nodes as soon as possible, we just end up re-adding a
841// lot of them in later again.  The "sufficiently stale" approach avoids
842// this.  (If a program has many live PDBs, performance will just suck,
843// there's no way around that.)
844//
845// Further comments, JRS 14 Feb 2012.  It turns out that the policy of
846// holding on to stale entries for 2 GCs before discarding them can lead
847// to massive space leaks.  So we're changing to an arrangement where
848// lines are evicted as soon as they are observed to be stale during a
849// GC.  This also has a side benefit of allowing the sufficiently_stale
850// field to be removed from the SecVBitNode struct, reducing its size by
851// 8 bytes, which is a substantial space saving considering that the
852// struct was previously 32 or so bytes, on a 64 bit target.
853//
854// In order to try and mitigate the problem that the "sufficiently stale"
855// heuristic was designed to avoid, the table size is allowed to drift
856// up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
857// means that nodes will exist in the table longer on average, and hopefully
858// will be deleted and re-added less frequently.
859//
860// The previous scaling up mechanism (now called STEPUP) is retained:
861// if residency exceeds 50%, the table is scaled up, although by a
862// factor sqrt(2) rather than 2 as before.  This effectively doubles the
863// frequency of GCs when there are many PDBs at reduces the tendency of
864// stale PDBs to reside for long periods in the table.
865
866static OSet* secVBitTable;
867
868// Stats
869static ULong sec_vbits_new_nodes = 0;
870static ULong sec_vbits_updates   = 0;
871
872// This must be a power of two;  this is checked in mc_pre_clo_init().
873// The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
874// a larger address range) they take more space but we can get multiple
875// partially-defined bytes in one if they are close to each other, reducing
876// the number of total nodes.  In practice sometimes they are clustered (eg.
877// perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
878// row), but often not.  So we choose something intermediate.
879#define BYTES_PER_SEC_VBIT_NODE     16
880
881// We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
882// more than this many nodes survive a GC.
883#define STEPUP_SURVIVOR_PROPORTION  0.5
884#define STEPUP_GROWTH_FACTOR        1.414213562
885
886// If the above heuristic doesn't apply, then we may make the table
887// slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
888// this many nodes survive a GC, _and_ the total table size does
889// not exceed a fixed limit.  The numbers are somewhat arbitrary, but
890// work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
891// effectively although gradually reduces residency and increases time
892// between GCs for programs with small numbers of PDBs.  The 80000 limit
893// effectively limits the table size to around 2MB for programs with
894// small numbers of PDBs, whilst giving a reasonably long lifetime to
895// entries, to try and reduce the costs resulting from deleting and
896// re-adding of entries.
897#define DRIFTUP_SURVIVOR_PROPORTION 0.15
898#define DRIFTUP_GROWTH_FACTOR       1.015
899#define DRIFTUP_MAX_SIZE            80000
900
901// We GC the table when it gets this many nodes in it, ie. it's effectively
902// the table size.  It can change.
903static Int  secVBitLimit = 1000;
904
905// The number of GCs done, used to age sec-V-bit nodes for eviction.
906// Because it's unsigned, wrapping doesn't matter -- the right answer will
907// come out anyway.
908static UInt GCs_done = 0;
909
910typedef
911   struct {
912      Addr  a;
913      UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
914   }
915   SecVBitNode;
916
917static OSet* createSecVBitTable(void)
918{
919   OSet* newSecVBitTable;
920   newSecVBitTable = VG_(OSetGen_Create_With_Pool)
921      ( offsetof(SecVBitNode, a),
922        NULL, // use fast comparisons
923        VG_(malloc), "mc.cSVT.1 (sec VBit table)",
924        VG_(free),
925        1000,
926        sizeof(SecVBitNode));
927   return newSecVBitTable;
928}
929
930static void gcSecVBitTable(void)
931{
932   OSet*        secVBitTable2;
933   SecVBitNode* n;
934   Int          i, n_nodes = 0, n_survivors = 0;
935
936   GCs_done++;
937
938   // Create the new table.
939   secVBitTable2 = createSecVBitTable();
940
941   // Traverse the table, moving fresh nodes into the new table.
942   VG_(OSetGen_ResetIter)(secVBitTable);
943   while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
944      // Keep node if any of its bytes are non-stale.  Using
945      // get_vabits2() for the lookup is not very efficient, but I don't
946      // think it matters.
947      for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
948         if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
949            // Found a non-stale byte, so keep =>
950            // Insert a copy of the node into the new table.
951            SecVBitNode* n2 =
952               VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
953            *n2 = *n;
954            VG_(OSetGen_Insert)(secVBitTable2, n2);
955            break;
956         }
957      }
958   }
959
960   // Get the before and after sizes.
961   n_nodes     = VG_(OSetGen_Size)(secVBitTable);
962   n_survivors = VG_(OSetGen_Size)(secVBitTable2);
963
964   // Destroy the old table, and put the new one in its place.
965   VG_(OSetGen_Destroy)(secVBitTable);
966   secVBitTable = secVBitTable2;
967
968   if (VG_(clo_verbosity) > 1) {
969      Char percbuf[7];
970      VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
971      VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
972                   n_nodes, n_survivors, percbuf);
973   }
974
975   // Increase table size if necessary.
976   if ((Double)n_survivors
977       > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
978      secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
979      if (VG_(clo_verbosity) > 1)
980         VG_(message)(Vg_DebugMsg,
981                      "memcheck GC: %d new table size (stepup)\n",
982                      secVBitLimit);
983   }
984   else
985   if (secVBitLimit < DRIFTUP_MAX_SIZE
986       && (Double)n_survivors
987          > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
988      secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
989      if (VG_(clo_verbosity) > 1)
990         VG_(message)(Vg_DebugMsg,
991                      "memcheck GC: %d new table size (driftup)\n",
992                      secVBitLimit);
993   }
994}
995
996static UWord get_sec_vbits8(Addr a)
997{
998   Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
999   Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
1000   SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1001   UChar        vbits8;
1002   tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1003   // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1004   // make it to the secondary V bits table.
1005   vbits8 = n->vbits8[amod];
1006   tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1007   return vbits8;
1008}
1009
1010static void set_sec_vbits8(Addr a, UWord vbits8)
1011{
1012   Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1013   Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
1014   SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1015   // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1016   // make it to the secondary V bits table.
1017   tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1018   if (n) {
1019      n->vbits8[amod] = vbits8;     // update
1020      sec_vbits_updates++;
1021   } else {
1022      // Do a table GC if necessary.  Nb: do this before creating and
1023      // inserting the new node, to avoid erroneously GC'ing the new node.
1024      if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1025         gcSecVBitTable();
1026      }
1027
1028      // New node:  assign the specific byte, make the rest invalid (they
1029      // should never be read as-is, but be cautious).
1030      n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1031      n->a            = aAligned;
1032      for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1033         n->vbits8[i] = V_BITS8_UNDEFINED;
1034      }
1035      n->vbits8[amod] = vbits8;
1036
1037      // Insert the new node.
1038      VG_(OSetGen_Insert)(secVBitTable, n);
1039      sec_vbits_new_nodes++;
1040
1041      n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1042      if (n_secVBit_nodes > max_secVBit_nodes)
1043         max_secVBit_nodes = n_secVBit_nodes;
1044   }
1045}
1046
1047/* --------------- Endianness helpers --------------- */
1048
1049/* Returns the offset in memory of the byteno-th most significant byte
1050   in a wordszB-sized word, given the specified endianness. */
1051static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1052                                    UWord byteno ) {
1053   return bigendian ? (wordszB-1-byteno) : byteno;
1054}
1055
1056
1057/* --------------- Ignored address ranges --------------- */
1058
1059#define M_IGNORE_RANGES 4
1060
1061typedef
1062   struct {
1063      Int  used;
1064      Addr start[M_IGNORE_RANGES];
1065      Addr end[M_IGNORE_RANGES];
1066   }
1067   IgnoreRanges;
1068
1069static IgnoreRanges ignoreRanges;
1070
1071INLINE Bool MC_(in_ignored_range) ( Addr a )
1072{
1073   Int i;
1074   if (LIKELY(ignoreRanges.used == 0))
1075      return False;
1076   for (i = 0; i < ignoreRanges.used; i++) {
1077      if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
1078         return True;
1079   }
1080   return False;
1081}
1082
1083/* Parse two Addr separated by a dash, or fail. */
1084
1085static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
1086{
1087   Bool ok = VG_(parse_Addr) (ppc, result1);
1088   if (!ok)
1089      return False;
1090   if (**ppc != '-')
1091      return False;
1092   (*ppc)++;
1093   ok = VG_(parse_Addr) (ppc, result2);
1094   if (!ok)
1095      return False;
1096   return True;
1097}
1098
1099/* Parse a set of ranges separated by commas into 'ignoreRanges', or
1100   fail. */
1101
1102static Bool parse_ignore_ranges ( UChar* str0 )
1103{
1104   Addr start, end;
1105   Bool ok;
1106   UChar*  str = str0;
1107   UChar** ppc = &str;
1108   ignoreRanges.used = 0;
1109   while (1) {
1110      ok = parse_range(ppc, &start, &end);
1111      if (!ok)
1112         return False;
1113      if (ignoreRanges.used >= M_IGNORE_RANGES)
1114         return False;
1115      ignoreRanges.start[ignoreRanges.used] = start;
1116      ignoreRanges.end[ignoreRanges.used] = end;
1117      ignoreRanges.used++;
1118      if (**ppc == 0)
1119         return True;
1120      if (**ppc != ',')
1121         return False;
1122      (*ppc)++;
1123   }
1124   /*NOTREACHED*/
1125   return False;
1126}
1127
1128
1129/* --------------- Load/store slow cases. --------------- */
1130
1131static
1132__attribute__((noinline))
1133ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1134{
1135   PROF_EVENT(30, "mc_LOADVn_slow");
1136
1137   /* ------------ BEGIN semi-fast cases ------------ */
1138   /* These deal quickly-ish with the common auxiliary primary map
1139      cases on 64-bit platforms.  Are merely a speedup hack; can be
1140      omitted without loss of correctness/functionality.  Note that in
1141      both cases the "sizeof(void*) == 8" causes these cases to be
1142      folded out by compilers on 32-bit platforms.  These are derived
1143      from LOADV64 and LOADV32.
1144   */
1145   if (LIKELY(sizeof(void*) == 8
1146                      && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1147      SecMap* sm       = get_secmap_for_reading(a);
1148      UWord   sm_off16 = SM_OFF_16(a);
1149      UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1150      if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1151         return V_BITS64_DEFINED;
1152      if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1153         return V_BITS64_UNDEFINED;
1154      /* else fall into the slow case */
1155   }
1156   if (LIKELY(sizeof(void*) == 8
1157                      && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1158      SecMap* sm = get_secmap_for_reading(a);
1159      UWord sm_off = SM_OFF(a);
1160      UWord vabits8 = sm->vabits8[sm_off];
1161      if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1162         return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1163      if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1164         return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1165      /* else fall into slow case */
1166   }
1167   /* ------------ END semi-fast cases ------------ */
1168
1169   ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
1170   ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
1171   SSizeT szB         = nBits / 8;
1172   SSizeT i;          /* Must be signed. */
1173   SizeT  n_addrs_bad = 0;
1174   Addr   ai;
1175   UChar  vbits8;
1176   Bool   ok;
1177
1178   tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1179
1180   /* Make up a 64-bit result V word, which contains the loaded data
1181      for valid addresses and Defined for invalid addresses.  Iterate
1182      over the bytes in the word, from the most significant down to
1183      the least.  The vbits to return are calculated into vbits64.
1184      Also compute the pessimising value to be used when
1185      --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1186      info can be gleaned from pessim64) but is used as a
1187      cross-check. */
1188   for (i = szB-1; i >= 0; i--) {
1189      PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1190      ai = a + byte_offset_w(szB, bigendian, i);
1191      ok = get_vbits8(ai, &vbits8);
1192      vbits64 <<= 8;
1193      vbits64 |= vbits8;
1194      if (!ok) n_addrs_bad++;
1195      pessim64 <<= 8;
1196      pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1197   }
1198
1199   /* In the common case, all the addresses involved are valid, so we
1200      just return the computed V bits and have done. */
1201   if (LIKELY(n_addrs_bad == 0))
1202      return vbits64;
1203
1204   /* If there's no possibility of getting a partial-loads-ok
1205      exemption, report the error and quit. */
1206   if (!MC_(clo_partial_loads_ok)) {
1207      MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1208      return vbits64;
1209   }
1210
1211   /* The partial-loads-ok excemption might apply.  Find out if it
1212      does.  If so, don't report an addressing error, but do return
1213      Undefined for the bytes that are out of range, so as to avoid
1214      false negatives.  If it doesn't apply, just report an addressing
1215      error in the usual way. */
1216
1217   /* Some code steps along byte strings in aligned word-sized chunks
1218      even when there is only a partially defined word at the end (eg,
1219      optimised strlen).  This is allowed by the memory model of
1220      modern machines, since an aligned load cannot span two pages and
1221      thus cannot "partially fault".  Despite such behaviour being
1222      declared undefined by ANSI C/C++.
1223
1224      Therefore, a load from a partially-addressible place is allowed
1225      if all of the following hold:
1226      - the command-line flag is set [by default, it isn't]
1227      - it's a word-sized, word-aligned load
1228      - at least one of the addresses in the word *is* valid
1229
1230      Since this suppresses the addressing error, we avoid false
1231      negatives by marking bytes undefined when they come from an
1232      invalid address.
1233   */
1234
1235   /* "at least one of the addresses is invalid" */
1236   tl_assert(pessim64 != V_BITS64_DEFINED);
1237
1238   if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1239       && n_addrs_bad < VG_WORDSIZE) {
1240      /* Exemption applies.  Use the previously computed pessimising
1241         value for vbits64 and return the combined result, but don't
1242         flag an addressing error.  The pessimising value is Defined
1243         for valid addresses and Undefined for invalid addresses. */
1244      /* for assumption that doing bitwise or implements UifU */
1245      tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1246      /* (really need "UifU" here...)
1247         vbits64 UifU= pessim64  (is pessimised by it, iow) */
1248      vbits64 |= pessim64;
1249      return vbits64;
1250   }
1251
1252   /* Exemption doesn't apply.  Flag an addressing error in the normal
1253      way. */
1254   MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1255
1256   return vbits64;
1257}
1258
1259
1260static
1261__attribute__((noinline))
1262void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1263{
1264   SizeT szB = nBits / 8;
1265   SizeT i, n_addrs_bad = 0;
1266   UChar vbits8;
1267   Addr  ai;
1268   Bool  ok;
1269
1270   PROF_EVENT(35, "mc_STOREVn_slow");
1271
1272   /* ------------ BEGIN semi-fast cases ------------ */
1273   /* These deal quickly-ish with the common auxiliary primary map
1274      cases on 64-bit platforms.  Are merely a speedup hack; can be
1275      omitted without loss of correctness/functionality.  Note that in
1276      both cases the "sizeof(void*) == 8" causes these cases to be
1277      folded out by compilers on 32-bit platforms.  These are derived
1278      from STOREV64 and STOREV32.
1279   */
1280   if (LIKELY(sizeof(void*) == 8
1281                      && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1282      SecMap* sm       = get_secmap_for_reading(a);
1283      UWord   sm_off16 = SM_OFF_16(a);
1284      UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1285      if (LIKELY( !is_distinguished_sm(sm) &&
1286                          (VA_BITS16_DEFINED   == vabits16 ||
1287                           VA_BITS16_UNDEFINED == vabits16) )) {
1288         /* Handle common case quickly: a is suitably aligned, */
1289         /* is mapped, and is addressible. */
1290         // Convert full V-bits in register to compact 2-bit form.
1291         if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1292            ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1293            return;
1294         } else if (V_BITS64_UNDEFINED == vbytes) {
1295            ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1296            return;
1297         }
1298         /* else fall into the slow case */
1299      }
1300      /* else fall into the slow case */
1301   }
1302   if (LIKELY(sizeof(void*) == 8
1303                      && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1304      SecMap* sm      = get_secmap_for_reading(a);
1305      UWord   sm_off  = SM_OFF(a);
1306      UWord   vabits8 = sm->vabits8[sm_off];
1307      if (LIKELY( !is_distinguished_sm(sm) &&
1308                          (VA_BITS8_DEFINED   == vabits8 ||
1309                           VA_BITS8_UNDEFINED == vabits8) )) {
1310         /* Handle common case quickly: a is suitably aligned, */
1311         /* is mapped, and is addressible. */
1312         // Convert full V-bits in register to compact 2-bit form.
1313         if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1314            sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1315            return;
1316         } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1317            sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1318            return;
1319         }
1320         /* else fall into the slow case */
1321      }
1322      /* else fall into the slow case */
1323   }
1324   /* ------------ END semi-fast cases ------------ */
1325
1326   tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1327
1328   /* Dump vbytes in memory, iterating from least to most significant
1329      byte.  At the same time establish addressibility of the location. */
1330   for (i = 0; i < szB; i++) {
1331      PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1332      ai     = a + byte_offset_w(szB, bigendian, i);
1333      vbits8 = vbytes & 0xff;
1334      ok     = set_vbits8(ai, vbits8);
1335      if (!ok) n_addrs_bad++;
1336      vbytes >>= 8;
1337   }
1338
1339   /* If an address error has happened, report it. */
1340   if (n_addrs_bad > 0)
1341      MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1342}
1343
1344
1345/*------------------------------------------------------------*/
1346/*--- Setting permissions over address ranges.             ---*/
1347/*------------------------------------------------------------*/
1348
1349static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1350                                      UWord dsm_num )
1351{
1352   UWord    sm_off, sm_off16;
1353   UWord    vabits2 = vabits16 & 0x3;
1354   SizeT    lenA, lenB, len_to_next_secmap;
1355   Addr     aNext;
1356   SecMap*  sm;
1357   SecMap** sm_ptr;
1358   SecMap*  example_dsm;
1359
1360   PROF_EVENT(150, "set_address_range_perms");
1361
1362   /* Check the V+A bits make sense. */
1363   tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1364             VA_BITS16_UNDEFINED == vabits16 ||
1365             VA_BITS16_DEFINED   == vabits16);
1366
1367   // This code should never write PDBs;  ensure this.  (See comment above
1368   // set_vabits2().)
1369   tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1370
1371   if (lenT == 0)
1372      return;
1373
1374   if (lenT > 256 * 1024 * 1024) {
1375      if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1376         Char* s = "unknown???";
1377         if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1378         if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1379         if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1380         VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1381                                  "large range [0x%lx, 0x%lx) (%s)\n",
1382                                  a, a + lenT, s);
1383      }
1384   }
1385
1386#ifndef PERF_FAST_SARP
1387   /*------------------ debug-only case ------------------ */
1388   {
1389      // Endianness doesn't matter here because all bytes are being set to
1390      // the same value.
1391      // Nb: We don't have to worry about updating the sec-V-bits table
1392      // after these set_vabits2() calls because this code never writes
1393      // VA_BITS2_PARTDEFINED values.
1394      SizeT i;
1395      for (i = 0; i < lenT; i++) {
1396         set_vabits2(a + i, vabits2);
1397      }
1398      return;
1399   }
1400#endif
1401
1402   /*------------------ standard handling ------------------ */
1403
1404   /* Get the distinguished secondary that we might want
1405      to use (part of the space-compression scheme). */
1406   example_dsm = &sm_distinguished[dsm_num];
1407
1408   // We have to handle ranges covering various combinations of partial and
1409   // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1410   // Cases marked with a '*' are common.
1411   //
1412   //   TYPE                                             PARTS USED
1413   //   ----                                             ----------
1414   // * one partial sec-map                  (p)         1
1415   // - one whole sec-map                    (P)         2
1416   //
1417   // * two partial sec-maps                 (pp)        1,3
1418   // - one partial, one whole sec-map       (pP)        1,2
1419   // - one whole, one partial sec-map       (Pp)        2,3
1420   // - two whole sec-maps                   (PP)        2,2
1421   //
1422   // * one partial, one whole, one partial  (pPp)       1,2,3
1423   // - one partial, two whole               (pPP)       1,2,2
1424   // - two whole, one partial               (PPp)       2,2,3
1425   // - three whole                          (PPP)       2,2,2
1426   //
1427   // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1428   // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1429   // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1430   // - N whole                              (PP...PP)   2,2...2,3
1431
1432   // Break up total length (lenT) into two parts:  length in the first
1433   // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1434   aNext = start_of_this_sm(a) + SM_SIZE;
1435   len_to_next_secmap = aNext - a;
1436   if ( lenT <= len_to_next_secmap ) {
1437      // Range entirely within one sec-map.  Covers almost all cases.
1438      PROF_EVENT(151, "set_address_range_perms-single-secmap");
1439      lenA = lenT;
1440      lenB = 0;
1441   } else if (is_start_of_sm(a)) {
1442      // Range spans at least one whole sec-map, and starts at the beginning
1443      // of a sec-map; skip to Part 2.
1444      PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1445      lenA = 0;
1446      lenB = lenT;
1447      goto part2;
1448   } else {
1449      // Range spans two or more sec-maps, first one is partial.
1450      PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1451      lenA = len_to_next_secmap;
1452      lenB = lenT - lenA;
1453   }
1454
1455   //------------------------------------------------------------------------
1456   // Part 1: Deal with the first sec_map.  Most of the time the range will be
1457   // entirely within a sec_map and this part alone will suffice.  Also,
1458   // doing it this way lets us avoid repeatedly testing for the crossing of
1459   // a sec-map boundary within these loops.
1460   //------------------------------------------------------------------------
1461
1462   // If it's distinguished, make it undistinguished if necessary.
1463   sm_ptr = get_secmap_ptr(a);
1464   if (is_distinguished_sm(*sm_ptr)) {
1465      if (*sm_ptr == example_dsm) {
1466         // Sec-map already has the V+A bits that we want, so skip.
1467         PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1468         a    = aNext;
1469         lenA = 0;
1470      } else {
1471         PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1472         *sm_ptr = copy_for_writing(*sm_ptr);
1473      }
1474   }
1475   sm = *sm_ptr;
1476
1477   // 1 byte steps
1478   while (True) {
1479      if (VG_IS_8_ALIGNED(a)) break;
1480      if (lenA < 1)           break;
1481      PROF_EVENT(156, "set_address_range_perms-loop1a");
1482      sm_off = SM_OFF(a);
1483      insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1484      a    += 1;
1485      lenA -= 1;
1486   }
1487   // 8-aligned, 8 byte steps
1488   while (True) {
1489      if (lenA < 8) break;
1490      PROF_EVENT(157, "set_address_range_perms-loop8a");
1491      sm_off16 = SM_OFF_16(a);
1492      ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1493      a    += 8;
1494      lenA -= 8;
1495   }
1496   // 1 byte steps
1497   while (True) {
1498      if (lenA < 1) break;
1499      PROF_EVENT(158, "set_address_range_perms-loop1b");
1500      sm_off = SM_OFF(a);
1501      insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1502      a    += 1;
1503      lenA -= 1;
1504   }
1505
1506   // We've finished the first sec-map.  Is that it?
1507   if (lenB == 0)
1508      return;
1509
1510   //------------------------------------------------------------------------
1511   // Part 2: Fast-set entire sec-maps at a time.
1512   //------------------------------------------------------------------------
1513  part2:
1514   // 64KB-aligned, 64KB steps.
1515   // Nb: we can reach here with lenB < SM_SIZE
1516   tl_assert(0 == lenA);
1517   while (True) {
1518      if (lenB < SM_SIZE) break;
1519      tl_assert(is_start_of_sm(a));
1520      PROF_EVENT(159, "set_address_range_perms-loop64K");
1521      sm_ptr = get_secmap_ptr(a);
1522      if (!is_distinguished_sm(*sm_ptr)) {
1523         PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1524         // Free the non-distinguished sec-map that we're replacing.  This
1525         // case happens moderately often, enough to be worthwhile.
1526         SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1527         tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1528      }
1529      update_SM_counts(*sm_ptr, example_dsm);
1530      // Make the sec-map entry point to the example DSM
1531      *sm_ptr = example_dsm;
1532      lenB -= SM_SIZE;
1533      a    += SM_SIZE;
1534   }
1535
1536   // We've finished the whole sec-maps.  Is that it?
1537   if (lenB == 0)
1538      return;
1539
1540   //------------------------------------------------------------------------
1541   // Part 3: Finish off the final partial sec-map, if necessary.
1542   //------------------------------------------------------------------------
1543
1544   tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1545
1546   // If it's distinguished, make it undistinguished if necessary.
1547   sm_ptr = get_secmap_ptr(a);
1548   if (is_distinguished_sm(*sm_ptr)) {
1549      if (*sm_ptr == example_dsm) {
1550         // Sec-map already has the V+A bits that we want, so stop.
1551         PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1552         return;
1553      } else {
1554         PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1555         *sm_ptr = copy_for_writing(*sm_ptr);
1556      }
1557   }
1558   sm = *sm_ptr;
1559
1560   // 8-aligned, 8 byte steps
1561   while (True) {
1562      if (lenB < 8) break;
1563      PROF_EVENT(163, "set_address_range_perms-loop8b");
1564      sm_off16 = SM_OFF_16(a);
1565      ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1566      a    += 8;
1567      lenB -= 8;
1568   }
1569   // 1 byte steps
1570   while (True) {
1571      if (lenB < 1) return;
1572      PROF_EVENT(164, "set_address_range_perms-loop1c");
1573      sm_off = SM_OFF(a);
1574      insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1575      a    += 1;
1576      lenB -= 1;
1577   }
1578}
1579
1580
1581/* --- Set permissions for arbitrary address ranges --- */
1582
1583void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1584{
1585   PROF_EVENT(40, "MC_(make_mem_noaccess)");
1586   DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1587   set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1588   if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1589      ocache_sarp_Clear_Origins ( a, len );
1590}
1591
1592static void make_mem_undefined ( Addr a, SizeT len )
1593{
1594   PROF_EVENT(41, "make_mem_undefined");
1595   DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1596   set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1597}
1598
1599void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1600{
1601   PROF_EVENT(41, "MC_(make_mem_undefined)");
1602   DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1603   set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1604   if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1605      ocache_sarp_Set_Origins ( a, len, otag );
1606}
1607
1608static
1609void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1610                                          ThreadId tid, UInt okind )
1611{
1612   UInt        ecu;
1613   ExeContext* here;
1614   /* VG_(record_ExeContext) checks for validity of tid, and asserts
1615      if it is invalid.  So no need to do it here. */
1616   tl_assert(okind <= 3);
1617   here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1618   tl_assert(here);
1619   ecu = VG_(get_ECU_from_ExeContext)(here);
1620   tl_assert(VG_(is_plausible_ECU)(ecu));
1621   MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1622}
1623
1624static
1625void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
1626   make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1627}
1628
1629
1630void MC_(make_mem_defined) ( Addr a, SizeT len )
1631{
1632   PROF_EVENT(42, "MC_(make_mem_defined)");
1633   DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1634   set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1635   if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1636      ocache_sarp_Clear_Origins ( a, len );
1637}
1638
1639/* For each byte in [a,a+len), if the byte is addressable, make it be
1640   defined, but if it isn't addressible, leave it alone.  In other
1641   words a version of MC_(make_mem_defined) that doesn't mess with
1642   addressibility.  Low-performance implementation. */
1643static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1644{
1645   SizeT i;
1646   UChar vabits2;
1647   DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1648   for (i = 0; i < len; i++) {
1649      vabits2 = get_vabits2( a+i );
1650      if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1651         set_vabits2(a+i, VA_BITS2_DEFINED);
1652         if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1653            MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1654         }
1655      }
1656   }
1657}
1658
1659/* Similarly (needed for mprotect handling ..) */
1660static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1661{
1662   SizeT i;
1663   UChar vabits2;
1664   DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1665   for (i = 0; i < len; i++) {
1666      vabits2 = get_vabits2( a+i );
1667      if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1668         set_vabits2(a+i, VA_BITS2_DEFINED);
1669         if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1670            MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1671         }
1672      }
1673   }
1674}
1675
1676/* --- Block-copy permissions (needed for implementing realloc() and
1677       sys_mremap). --- */
1678
1679void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1680{
1681   SizeT i, j;
1682   UChar vabits2, vabits8;
1683   Bool  aligned, nooverlap;
1684
1685   DEBUG("MC_(copy_address_range_state)\n");
1686   PROF_EVENT(50, "MC_(copy_address_range_state)");
1687
1688   if (len == 0 || src == dst)
1689      return;
1690
1691   aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1692   nooverlap = src+len <= dst || dst+len <= src;
1693
1694   if (nooverlap && aligned) {
1695
1696      /* Vectorised fast case, when no overlap and suitably aligned */
1697      /* vector loop */
1698      i = 0;
1699      while (len >= 4) {
1700         vabits8 = get_vabits8_for_aligned_word32( src+i );
1701         set_vabits8_for_aligned_word32( dst+i, vabits8 );
1702         if (LIKELY(VA_BITS8_DEFINED == vabits8
1703                            || VA_BITS8_UNDEFINED == vabits8
1704                            || VA_BITS8_NOACCESS == vabits8)) {
1705            /* do nothing */
1706         } else {
1707            /* have to copy secondary map info */
1708            if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1709               set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1710            if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1711               set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1712            if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1713               set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1714            if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1715               set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1716         }
1717         i += 4;
1718         len -= 4;
1719      }
1720      /* fixup loop */
1721      while (len >= 1) {
1722         vabits2 = get_vabits2( src+i );
1723         set_vabits2( dst+i, vabits2 );
1724         if (VA_BITS2_PARTDEFINED == vabits2) {
1725            set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1726         }
1727         i++;
1728         len--;
1729      }
1730
1731   } else {
1732
1733      /* We have to do things the slow way */
1734      if (src < dst) {
1735         for (i = 0, j = len-1; i < len; i++, j--) {
1736            PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1737            vabits2 = get_vabits2( src+j );
1738            set_vabits2( dst+j, vabits2 );
1739            if (VA_BITS2_PARTDEFINED == vabits2) {
1740               set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1741            }
1742         }
1743      }
1744
1745      if (src > dst) {
1746         for (i = 0; i < len; i++) {
1747            PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1748            vabits2 = get_vabits2( src+i );
1749            set_vabits2( dst+i, vabits2 );
1750            if (VA_BITS2_PARTDEFINED == vabits2) {
1751               set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1752            }
1753         }
1754      }
1755   }
1756
1757}
1758
1759
1760/*------------------------------------------------------------*/
1761/*--- Origin tracking stuff - cache basics                 ---*/
1762/*------------------------------------------------------------*/
1763
1764/* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1765   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1766
1767   Note that this implementation draws inspiration from the "origin
1768   tracking by value piggybacking" scheme described in "Tracking Bad
1769   Apples: Reporting the Origin of Null and Undefined Value Errors"
1770   (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1771   Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1772   implemented completely differently.
1773
1774   Origin tags and ECUs -- about the shadow values
1775   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1776
1777   This implementation tracks the defining point of all uninitialised
1778   values using so called "origin tags", which are 32-bit integers,
1779   rather than using the values themselves to encode the origins.  The
1780   latter, so-called value piggybacking", is what the OOPSLA07 paper
1781   describes.
1782
1783   Origin tags, as tracked by the machinery below, are 32-bit unsigned
1784   ints (UInts), regardless of the machine's word size.  Each tag
1785   comprises an upper 30-bit ECU field and a lower 2-bit
1786   'kind' field.  The ECU field is a number given out by m_execontext
1787   and has a 1-1 mapping with ExeContext*s.  An ECU can be used
1788   directly as an origin tag (otag), but in fact we want to put
1789   additional information 'kind' field to indicate roughly where the
1790   tag came from.  This helps print more understandable error messages
1791   for the user -- it has no other purpose.  In summary:
1792
1793   * Both ECUs and origin tags are represented as 32-bit words
1794
1795   * m_execontext and the core-tool interface deal purely in ECUs.
1796     They have no knowledge of origin tags - that is a purely
1797     Memcheck-internal matter.
1798
1799   * all valid ECUs have the lowest 2 bits zero and at least
1800     one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
1801
1802   * to convert from an ECU to an otag, OR in one of the MC_OKIND_
1803     constants defined in mc_include.h.
1804
1805   * to convert an otag back to an ECU, AND it with ~3
1806
1807   One important fact is that no valid otag is zero.  A zero otag is
1808   used by the implementation to indicate "no origin", which could
1809   mean that either the value is defined, or it is undefined but the
1810   implementation somehow managed to lose the origin.
1811
1812   The ECU used for memory created by malloc etc is derived from the
1813   stack trace at the time the malloc etc happens.  This means the
1814   mechanism can show the exact allocation point for heap-created
1815   uninitialised values.
1816
1817   In contrast, it is simply too expensive to create a complete
1818   backtrace for each stack allocation.  Therefore we merely use a
1819   depth-1 backtrace for stack allocations, which can be done once at
1820   translation time, rather than N times at run time.  The result of
1821   this is that, for stack created uninitialised values, Memcheck can
1822   only show the allocating function, and not what called it.
1823   Furthermore, compilers tend to move the stack pointer just once at
1824   the start of the function, to allocate all locals, and so in fact
1825   the stack origin almost always simply points to the opening brace
1826   of the function.  Net result is, for stack origins, the mechanism
1827   can tell you in which function the undefined value was created, but
1828   that's all.  Users will need to carefully check all locals in the
1829   specified function.
1830
1831   Shadowing registers and memory
1832   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1833
1834   Memory is shadowed using a two level cache structure (ocacheL1 and
1835   ocacheL2).  Memory references are first directed to ocacheL1.  This
1836   is a traditional 2-way set associative cache with 32-byte lines and
1837   approximate LRU replacement within each set.
1838
1839   A naive implementation would require storing one 32 bit otag for
1840   each byte of memory covered, a 4:1 space overhead.  Instead, there
1841   is one otag for every 4 bytes of memory covered, plus a 4-bit mask
1842   that shows which of the 4 bytes have that shadow value and which
1843   have a shadow value of zero (indicating no origin).  Hence a lot of
1844   space is saved, but the cost is that only one different origin per
1845   4 bytes of address space can be represented.  This is a source of
1846   imprecision, but how much of a problem it really is remains to be
1847   seen.
1848
1849   A cache line that contains all zeroes ("no origins") contains no
1850   useful information, and can be ejected from the L1 cache "for
1851   free", in the sense that a read miss on the L1 causes a line of
1852   zeroes to be installed.  However, ejecting a line containing
1853   nonzeroes risks losing origin information permanently.  In order to
1854   prevent such lossage, ejected nonzero lines are placed in a
1855   secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
1856   lines.  This can grow arbitrarily large, and so should ensure that
1857   Memcheck runs out of memory in preference to losing useful origin
1858   info due to cache size limitations.
1859
1860   Shadowing registers is a bit tricky, because the shadow values are
1861   32 bits, regardless of the size of the register.  That gives a
1862   problem for registers smaller than 32 bits.  The solution is to
1863   find spaces in the guest state that are unused, and use those to
1864   shadow guest state fragments smaller than 32 bits.  For example, on
1865   ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
1866   shadow are allocated for the register's otag, then there are still
1867   12 bytes left over which could be used to shadow 3 other values.
1868
1869   This implies there is some non-obvious mapping from guest state
1870   (start,length) pairs to the relevant shadow offset (for the origin
1871   tags).  And it is unfortunately guest-architecture specific.  The
1872   mapping is contained in mc_machine.c, which is quite lengthy but
1873   straightforward.
1874
1875   Instrumenting the IR
1876   ~~~~~~~~~~~~~~~~~~~~
1877
1878   Instrumentation is largely straightforward, and done by the
1879   functions schemeE and schemeS in mc_translate.c.  These generate
1880   code for handling the origin tags of expressions (E) and statements
1881   (S) respectively.  The rather strange names are a reference to the
1882   "compilation schemes" shown in Simon Peyton Jones' book "The
1883   Implementation of Functional Programming Languages" (Prentice Hall,
1884   1987, see
1885   http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
1886
1887   schemeS merely arranges to move shadow values around the guest
1888   state to track the incoming IR.  schemeE is largely trivial too.
1889   The only significant point is how to compute the otag corresponding
1890   to binary (or ternary, quaternary, etc) operator applications.  The
1891   rule is simple: just take whichever value is larger (32-bit
1892   unsigned max).  Constants get the special value zero.  Hence this
1893   rule always propagates a nonzero (known) otag in preference to a
1894   zero (unknown, or more likely, value-is-defined) tag, as we want.
1895   If two different undefined values are inputs to a binary operator
1896   application, then which is propagated is arbitrary, but that
1897   doesn't matter, since the program is erroneous in using either of
1898   the values, and so there's no point in attempting to propagate
1899   both.
1900
1901   Since constants are abstracted to (otag) zero, much of the
1902   instrumentation code can be folded out without difficulty by the
1903   generic post-instrumentation IR cleanup pass, using these rules:
1904   Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
1905   constants is evaluated at JIT time.  And the resulting dead code
1906   removal.  In practice this causes surprisingly few Max32Us to
1907   survive through to backend code generation.
1908
1909   Integration with the V-bits machinery
1910   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1911
1912   This is again largely straightforward.  Mostly the otag and V bits
1913   stuff are independent.  The only point of interaction is when the V
1914   bits instrumenter creates a call to a helper function to report an
1915   uninitialised value error -- in that case it must first use schemeE
1916   to get hold of the origin tag expression for the value, and pass
1917   that to the helper too.
1918
1919   There is the usual stuff to do with setting address range
1920   permissions.  When memory is painted undefined, we must also know
1921   the origin tag to paint with, which involves some tedious plumbing,
1922   particularly to do with the fast case stack handlers.  When memory
1923   is painted defined or noaccess then the origin tags must be forced
1924   to zero.
1925
1926   One of the goals of the implementation was to ensure that the
1927   non-origin tracking mode isn't slowed down at all.  To do this,
1928   various functions to do with memory permissions setting (again,
1929   mostly pertaining to the stack) are duplicated for the with- and
1930   without-otag case.
1931
1932   Dealing with stack redzones, and the NIA cache
1933   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1934
1935   This is one of the few non-obvious parts of the implementation.
1936
1937   Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
1938   reserved area below the stack pointer, that can be used as scratch
1939   space by compiler generated code for functions.  In the Memcheck
1940   sources this is referred to as the "stack redzone".  The important
1941   thing here is that such redzones are considered volatile across
1942   function calls and returns.  So Memcheck takes care to mark them as
1943   undefined for each call and return, on the afflicted platforms.
1944   Past experience shows this is essential in order to get reliable
1945   messages about uninitialised values that come from the stack.
1946
1947   So the question is, when we paint a redzone undefined, what origin
1948   tag should we use for it?  Consider a function f() calling g().  If
1949   we paint the redzone using an otag derived from the ExeContext of
1950   the CALL/BL instruction in f, then any errors in g causing it to
1951   use uninitialised values that happen to lie in the redzone, will be
1952   reported as having their origin in f.  Which is highly confusing.
1953
1954   The same applies for returns: if, on a return, we paint the redzone
1955   using a origin tag derived from the ExeContext of the RET/BLR
1956   instruction in g, then any later errors in f causing it to use
1957   uninitialised values in the redzone, will be reported as having
1958   their origin in g.  Which is just as confusing.
1959
1960   To do it right, in both cases we need to use an origin tag which
1961   pertains to the instruction which dynamically follows the CALL/BL
1962   or RET/BLR.  In short, one derived from the NIA - the "next
1963   instruction address".
1964
1965   To make this work, Memcheck's redzone-painting helper,
1966   MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
1967   NIA.  It converts the NIA to a 1-element ExeContext, and uses that
1968   ExeContext's ECU as the basis for the otag used to paint the
1969   redzone.  The expensive part of this is converting an NIA into an
1970   ECU, since this happens once for every call and every return.  So
1971   we use a simple 511-line, 2-way set associative cache
1972   (nia_to_ecu_cache) to cache the mappings, and that knocks most of
1973   the cost out.
1974
1975   Further background comments
1976   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1977
1978   > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
1979   > it really just the address of the relevant ExeContext?
1980
1981   Well, it's not the address, but a value which has a 1-1 mapping
1982   with ExeContexts, and is guaranteed not to be zero, since zero
1983   denotes (to memcheck) "unknown origin or defined value".  So these
1984   UInts are just numbers starting at 4 and incrementing by 4; each
1985   ExeContext is given a number when it is created.  (*** NOTE this
1986   confuses otags and ECUs; see comments above ***).
1987
1988   Making these otags 32-bit regardless of the machine's word size
1989   makes the 64-bit implementation easier (next para).  And it doesn't
1990   really limit us in any way, since for the tags to overflow would
1991   require that the program somehow caused 2^30-1 different
1992   ExeContexts to be created, in which case it is probably in deep
1993   trouble.  Not to mention V will have soaked up many tens of
1994   gigabytes of memory merely to store them all.
1995
1996   So having 64-bit origins doesn't really buy you anything, and has
1997   the following downsides:
1998
1999   Suppose that instead, an otag is a UWord.  This would mean that, on
2000   a 64-bit target,
2001
2002   1. It becomes hard to shadow any element of guest state which is
2003      smaller than 8 bytes.  To do so means you'd need to find some
2004      8-byte-sized hole in the guest state which you don't want to
2005      shadow, and use that instead to hold the otag.  On ppc64, the
2006      condition code register(s) are split into 20 UChar sized pieces,
2007      all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2008      and so that would entail finding 160 bytes somewhere else in the
2009      guest state.
2010
2011      Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2012      of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2013      same) and so I had to look for 4 untracked otag-sized areas in
2014      the guest state to make that possible.
2015
2016      The same problem exists of course when origin tags are only 32
2017      bits, but it's less extreme.
2018
2019   2. (More compelling) it doubles the size of the origin shadow
2020      memory.  Given that the shadow memory is organised as a fixed
2021      size cache, and that accuracy of tracking is limited by origins
2022      falling out the cache due to space conflicts, this isn't good.
2023
2024   > Another question: is the origin tracking perfect, or are there
2025   > cases where it fails to determine an origin?
2026
2027   It is imperfect for at least for the following reasons, and
2028   probably more:
2029
2030   * Insufficient capacity in the origin cache.  When a line is
2031     evicted from the cache it is gone forever, and so subsequent
2032     queries for the line produce zero, indicating no origin
2033     information.  Interestingly, a line containing all zeroes can be
2034     evicted "free" from the cache, since it contains no useful
2035     information, so there is scope perhaps for some cleverer cache
2036     management schemes.  (*** NOTE, with the introduction of the
2037     second level origin tag cache, ocacheL2, this is no longer a
2038     problem. ***)
2039
2040   * The origin cache only stores one otag per 32-bits of address
2041     space, plus 4 bits indicating which of the 4 bytes has that tag
2042     and which are considered defined.  The result is that if two
2043     undefined bytes in the same word are stored in memory, the first
2044     stored byte's origin will be lost and replaced by the origin for
2045     the second byte.
2046
2047   * Nonzero origin tags for defined values.  Consider a binary
2048     operator application op(x,y).  Suppose y is undefined (and so has
2049     a valid nonzero origin tag), and x is defined, but erroneously
2050     has a nonzero origin tag (defined values should have tag zero).
2051     If the erroneous tag has a numeric value greater than y's tag,
2052     then the rule for propagating origin tags though binary
2053     operations, which is simply to take the unsigned max of the two
2054     tags, will erroneously propagate x's tag rather than y's.
2055
2056   * Some obscure uses of x86/amd64 byte registers can cause lossage
2057     or confusion of origins.  %AH .. %DH are treated as different
2058     from, and unrelated to, their parent registers, %EAX .. %EDX.
2059     So some wierd sequences like
2060
2061        movb undefined-value, %AH
2062        movb defined-value, %AL
2063        .. use %AX or %EAX ..
2064
2065     will cause the origin attributed to %AH to be ignored, since %AL,
2066     %AX, %EAX are treated as the same register, and %AH as a
2067     completely separate one.
2068
2069   But having said all that, it actually seems to work fairly well in
2070   practice.
2071*/
2072
2073static UWord stats_ocacheL1_find           = 0;
2074static UWord stats_ocacheL1_found_at_1     = 0;
2075static UWord stats_ocacheL1_found_at_N     = 0;
2076static UWord stats_ocacheL1_misses         = 0;
2077static UWord stats_ocacheL1_lossage        = 0;
2078static UWord stats_ocacheL1_movefwds       = 0;
2079
2080static UWord stats__ocacheL2_refs          = 0;
2081static UWord stats__ocacheL2_misses        = 0;
2082static UWord stats__ocacheL2_n_nodes_max   = 0;
2083
2084/* Cache of 32-bit values, one every 32 bits of address space */
2085
2086#define OC_BITS_PER_LINE 5
2087#define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2088
2089static INLINE UWord oc_line_offset ( Addr a ) {
2090   return (a >> 2) & (OC_W32S_PER_LINE - 1);
2091}
2092static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2093   return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2094}
2095
2096#define OC_LINES_PER_SET 2
2097
2098#define OC_N_SET_BITS    20
2099#define OC_N_SETS        (1 << OC_N_SET_BITS)
2100
2101/* These settings give:
2102   64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2103   32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2104*/
2105
2106#define OC_MOVE_FORWARDS_EVERY_BITS 7
2107
2108
2109typedef
2110   struct {
2111      Addr  tag;
2112      UInt  w32[OC_W32S_PER_LINE];
2113      UChar descr[OC_W32S_PER_LINE];
2114   }
2115   OCacheLine;
2116
2117/* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2118   in use, 'n' (nonzero) if it contains at least one valid origin tag,
2119   and 'z' if all the represented tags are zero. */
2120static UChar classify_OCacheLine ( OCacheLine* line )
2121{
2122   UWord i;
2123   if (line->tag == 1/*invalid*/)
2124      return 'e'; /* EMPTY */
2125   tl_assert(is_valid_oc_tag(line->tag));
2126   for (i = 0; i < OC_W32S_PER_LINE; i++) {
2127      tl_assert(0 == ((~0xF) & line->descr[i]));
2128      if (line->w32[i] > 0 && line->descr[i] > 0)
2129         return 'n'; /* NONZERO - contains useful info */
2130   }
2131   return 'z'; /* ZERO - no useful info */
2132}
2133
2134typedef
2135   struct {
2136      OCacheLine line[OC_LINES_PER_SET];
2137   }
2138   OCacheSet;
2139
2140typedef
2141   struct {
2142      OCacheSet set[OC_N_SETS];
2143   }
2144   OCache;
2145
2146static OCache* ocacheL1 = NULL;
2147static UWord   ocacheL1_event_ctr = 0;
2148
2149static void init_ocacheL2 ( void ); /* fwds */
2150static void init_OCache ( void )
2151{
2152   UWord line, set;
2153   tl_assert(MC_(clo_mc_level) >= 3);
2154   tl_assert(ocacheL1 == NULL);
2155   ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2156   if (ocacheL1 == NULL) {
2157      VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2158                                   sizeof(OCache) );
2159   }
2160   tl_assert(ocacheL1 != NULL);
2161   for (set = 0; set < OC_N_SETS; set++) {
2162      for (line = 0; line < OC_LINES_PER_SET; line++) {
2163         ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2164      }
2165   }
2166   init_ocacheL2();
2167}
2168
2169static void moveLineForwards ( OCacheSet* set, UWord lineno )
2170{
2171   OCacheLine tmp;
2172   stats_ocacheL1_movefwds++;
2173   tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2174   tmp = set->line[lineno-1];
2175   set->line[lineno-1] = set->line[lineno];
2176   set->line[lineno] = tmp;
2177}
2178
2179static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2180   UWord i;
2181   for (i = 0; i < OC_W32S_PER_LINE; i++) {
2182      line->w32[i] = 0; /* NO ORIGIN */
2183      line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2184   }
2185   line->tag = tag;
2186}
2187
2188//////////////////////////////////////////////////////////////
2189//// OCache backing store
2190
2191static OSet* ocacheL2 = NULL;
2192
2193static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
2194   return VG_(malloc)(cc, szB);
2195}
2196static void ocacheL2_free ( void* v ) {
2197   VG_(free)( v );
2198}
2199
2200/* Stats: # nodes currently in tree */
2201static UWord stats__ocacheL2_n_nodes = 0;
2202
2203static void init_ocacheL2 ( void )
2204{
2205   tl_assert(!ocacheL2);
2206   tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2207   tl_assert(0 == offsetof(OCacheLine,tag));
2208   ocacheL2
2209      = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2210                             NULL, /* fast cmp */
2211                             ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2212   tl_assert(ocacheL2);
2213   stats__ocacheL2_n_nodes = 0;
2214}
2215
2216/* Find line with the given tag in the tree, or NULL if not found. */
2217static OCacheLine* ocacheL2_find_tag ( Addr tag )
2218{
2219   OCacheLine* line;
2220   tl_assert(is_valid_oc_tag(tag));
2221   stats__ocacheL2_refs++;
2222   line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2223   return line;
2224}
2225
2226/* Delete the line with the given tag from the tree, if it is present, and
2227   free up the associated memory. */
2228static void ocacheL2_del_tag ( Addr tag )
2229{
2230   OCacheLine* line;
2231   tl_assert(is_valid_oc_tag(tag));
2232   stats__ocacheL2_refs++;
2233   line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2234   if (line) {
2235      VG_(OSetGen_FreeNode)(ocacheL2, line);
2236      tl_assert(stats__ocacheL2_n_nodes > 0);
2237      stats__ocacheL2_n_nodes--;
2238   }
2239}
2240
2241/* Add a copy of the given line to the tree.  It must not already be
2242   present. */
2243static void ocacheL2_add_line ( OCacheLine* line )
2244{
2245   OCacheLine* copy;
2246   tl_assert(is_valid_oc_tag(line->tag));
2247   copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2248   tl_assert(copy);
2249   *copy = *line;
2250   stats__ocacheL2_refs++;
2251   VG_(OSetGen_Insert)( ocacheL2, copy );
2252   stats__ocacheL2_n_nodes++;
2253   if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2254      stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2255}
2256
2257////
2258//////////////////////////////////////////////////////////////
2259
2260__attribute__((noinline))
2261static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2262{
2263   OCacheLine *victim, *inL2;
2264   UChar c;
2265   UWord line;
2266   UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2267   UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2268   UWord tag     = a & tagmask;
2269   tl_assert(setno >= 0 && setno < OC_N_SETS);
2270
2271   /* we already tried line == 0; skip therefore. */
2272   for (line = 1; line < OC_LINES_PER_SET; line++) {
2273      if (ocacheL1->set[setno].line[line].tag == tag) {
2274         if (line == 1) {
2275            stats_ocacheL1_found_at_1++;
2276         } else {
2277            stats_ocacheL1_found_at_N++;
2278         }
2279         if (UNLIKELY(0 == (ocacheL1_event_ctr++
2280                            & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2281            moveLineForwards( &ocacheL1->set[setno], line );
2282            line--;
2283         }
2284         return &ocacheL1->set[setno].line[line];
2285      }
2286   }
2287
2288   /* A miss.  Use the last slot.  Implicitly this means we're
2289      ejecting the line in the last slot. */
2290   stats_ocacheL1_misses++;
2291   tl_assert(line == OC_LINES_PER_SET);
2292   line--;
2293   tl_assert(line > 0);
2294
2295   /* First, move the to-be-ejected line to the L2 cache. */
2296   victim = &ocacheL1->set[setno].line[line];
2297   c = classify_OCacheLine(victim);
2298   switch (c) {
2299      case 'e':
2300         /* the line is empty (has invalid tag); ignore it. */
2301         break;
2302      case 'z':
2303         /* line contains zeroes.  We must ensure the backing store is
2304            updated accordingly, either by copying the line there
2305            verbatim, or by ensuring it isn't present there.  We
2306            chosse the latter on the basis that it reduces the size of
2307            the backing store. */
2308         ocacheL2_del_tag( victim->tag );
2309         break;
2310      case 'n':
2311         /* line contains at least one real, useful origin.  Copy it
2312            to the backing store. */
2313         stats_ocacheL1_lossage++;
2314         inL2 = ocacheL2_find_tag( victim->tag );
2315         if (inL2) {
2316            *inL2 = *victim;
2317         } else {
2318            ocacheL2_add_line( victim );
2319         }
2320         break;
2321      default:
2322         tl_assert(0);
2323   }
2324
2325   /* Now we must reload the L1 cache from the backing tree, if
2326      possible. */
2327   tl_assert(tag != victim->tag); /* stay sane */
2328   inL2 = ocacheL2_find_tag( tag );
2329   if (inL2) {
2330      /* We're in luck.  It's in the L2. */
2331      ocacheL1->set[setno].line[line] = *inL2;
2332   } else {
2333      /* Missed at both levels of the cache hierarchy.  We have to
2334         declare it as full of zeroes (unknown origins). */
2335      stats__ocacheL2_misses++;
2336      zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2337   }
2338
2339   /* Move it one forwards */
2340   moveLineForwards( &ocacheL1->set[setno], line );
2341   line--;
2342
2343   return &ocacheL1->set[setno].line[line];
2344}
2345
2346static INLINE OCacheLine* find_OCacheLine ( Addr a )
2347{
2348   UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2349   UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2350   UWord tag     = a & tagmask;
2351
2352   stats_ocacheL1_find++;
2353
2354   if (OC_ENABLE_ASSERTIONS) {
2355      tl_assert(setno >= 0 && setno < OC_N_SETS);
2356      tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2357   }
2358
2359   if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2360      return &ocacheL1->set[setno].line[0];
2361   }
2362
2363   return find_OCacheLine_SLOW( a );
2364}
2365
2366static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2367{
2368   //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2369   //// Set the origins for a+0 .. a+7
2370   { OCacheLine* line;
2371     UWord lineoff = oc_line_offset(a);
2372     if (OC_ENABLE_ASSERTIONS) {
2373        tl_assert(lineoff >= 0
2374                  && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2375     }
2376     line = find_OCacheLine( a );
2377     line->descr[lineoff+0] = 0xF;
2378     line->descr[lineoff+1] = 0xF;
2379     line->w32[lineoff+0]   = otag;
2380     line->w32[lineoff+1]   = otag;
2381   }
2382   //// END inlined, specialised version of MC_(helperc_b_store8)
2383}
2384
2385
2386/*------------------------------------------------------------*/
2387/*--- Aligned fast case permission setters,                ---*/
2388/*--- for dealing with stacks                              ---*/
2389/*------------------------------------------------------------*/
2390
2391/*--------------------- 32-bit ---------------------*/
2392
2393/* Nb: by "aligned" here we mean 4-byte aligned */
2394
2395static INLINE void make_aligned_word32_undefined ( Addr a )
2396{
2397   PROF_EVENT(300, "make_aligned_word32_undefined");
2398
2399#ifndef PERF_FAST_STACK2
2400   make_mem_undefined(a, 4);
2401#else
2402   {
2403      UWord   sm_off;
2404      SecMap* sm;
2405
2406      if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2407         PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2408         make_mem_undefined(a, 4);
2409         return;
2410      }
2411
2412      sm                  = get_secmap_for_writing_low(a);
2413      sm_off              = SM_OFF(a);
2414      sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2415   }
2416#endif
2417}
2418
2419static INLINE
2420void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2421{
2422   make_aligned_word32_undefined(a);
2423   //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2424   //// Set the origins for a+0 .. a+3
2425   { OCacheLine* line;
2426     UWord lineoff = oc_line_offset(a);
2427     if (OC_ENABLE_ASSERTIONS) {
2428        tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2429     }
2430     line = find_OCacheLine( a );
2431     line->descr[lineoff] = 0xF;
2432     line->w32[lineoff]   = otag;
2433   }
2434   //// END inlined, specialised version of MC_(helperc_b_store4)
2435}
2436
2437static INLINE
2438void make_aligned_word32_noaccess ( Addr a )
2439{
2440   PROF_EVENT(310, "make_aligned_word32_noaccess");
2441
2442#ifndef PERF_FAST_STACK2
2443   MC_(make_mem_noaccess)(a, 4);
2444#else
2445   {
2446      UWord   sm_off;
2447      SecMap* sm;
2448
2449      if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2450         PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2451         MC_(make_mem_noaccess)(a, 4);
2452         return;
2453      }
2454
2455      sm                  = get_secmap_for_writing_low(a);
2456      sm_off              = SM_OFF(a);
2457      sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2458
2459      //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2460      //// Set the origins for a+0 .. a+3.
2461      if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2462         OCacheLine* line;
2463         UWord lineoff = oc_line_offset(a);
2464         if (OC_ENABLE_ASSERTIONS) {
2465            tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2466         }
2467         line = find_OCacheLine( a );
2468         line->descr[lineoff] = 0;
2469      }
2470      //// END inlined, specialised version of MC_(helperc_b_store4)
2471   }
2472#endif
2473}
2474
2475/*--------------------- 64-bit ---------------------*/
2476
2477/* Nb: by "aligned" here we mean 8-byte aligned */
2478
2479static INLINE void make_aligned_word64_undefined ( Addr a )
2480{
2481   PROF_EVENT(320, "make_aligned_word64_undefined");
2482
2483#ifndef PERF_FAST_STACK2
2484   make_mem_undefined(a, 8);
2485#else
2486   {
2487      UWord   sm_off16;
2488      SecMap* sm;
2489
2490      if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2491         PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2492         make_mem_undefined(a, 8);
2493         return;
2494      }
2495
2496      sm       = get_secmap_for_writing_low(a);
2497      sm_off16 = SM_OFF_16(a);
2498      ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2499   }
2500#endif
2501}
2502
2503static INLINE
2504void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2505{
2506   make_aligned_word64_undefined(a);
2507   //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2508   //// Set the origins for a+0 .. a+7
2509   { OCacheLine* line;
2510     UWord lineoff = oc_line_offset(a);
2511     tl_assert(lineoff >= 0
2512               && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2513     line = find_OCacheLine( a );
2514     line->descr[lineoff+0] = 0xF;
2515     line->descr[lineoff+1] = 0xF;
2516     line->w32[lineoff+0]   = otag;
2517     line->w32[lineoff+1]   = otag;
2518   }
2519   //// END inlined, specialised version of MC_(helperc_b_store8)
2520}
2521
2522static INLINE
2523void make_aligned_word64_noaccess ( Addr a )
2524{
2525   PROF_EVENT(330, "make_aligned_word64_noaccess");
2526
2527#ifndef PERF_FAST_STACK2
2528   MC_(make_mem_noaccess)(a, 8);
2529#else
2530   {
2531      UWord   sm_off16;
2532      SecMap* sm;
2533
2534      if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2535         PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2536         MC_(make_mem_noaccess)(a, 8);
2537         return;
2538      }
2539
2540      sm       = get_secmap_for_writing_low(a);
2541      sm_off16 = SM_OFF_16(a);
2542      ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2543
2544      //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2545      //// Clear the origins for a+0 .. a+7.
2546      if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2547         OCacheLine* line;
2548         UWord lineoff = oc_line_offset(a);
2549         tl_assert(lineoff >= 0
2550                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2551         line = find_OCacheLine( a );
2552         line->descr[lineoff+0] = 0;
2553         line->descr[lineoff+1] = 0;
2554      }
2555      //// END inlined, specialised version of MC_(helperc_b_store8)
2556   }
2557#endif
2558}
2559
2560
2561/*------------------------------------------------------------*/
2562/*--- Stack pointer adjustment                             ---*/
2563/*------------------------------------------------------------*/
2564
2565#ifdef PERF_FAST_STACK
2566#  define MAYBE_USED
2567#else
2568#  define MAYBE_USED __attribute__((unused))
2569#endif
2570
2571/*--------------- adjustment by 4 bytes ---------------*/
2572
2573MAYBE_USED
2574static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2575{
2576   UInt otag = ecu | MC_OKIND_STACK;
2577   PROF_EVENT(110, "new_mem_stack_4");
2578   if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2579      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2580   } else {
2581      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2582   }
2583}
2584
2585MAYBE_USED
2586static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2587{
2588   PROF_EVENT(110, "new_mem_stack_4");
2589   if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2590      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2591   } else {
2592      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2593   }
2594}
2595
2596MAYBE_USED
2597static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2598{
2599   PROF_EVENT(120, "die_mem_stack_4");
2600   if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2601      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2602   } else {
2603      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2604   }
2605}
2606
2607/*--------------- adjustment by 8 bytes ---------------*/
2608
2609MAYBE_USED
2610static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2611{
2612   UInt otag = ecu | MC_OKIND_STACK;
2613   PROF_EVENT(111, "new_mem_stack_8");
2614   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2615      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2616   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2617      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2618      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2619   } else {
2620      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2621   }
2622}
2623
2624MAYBE_USED
2625static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2626{
2627   PROF_EVENT(111, "new_mem_stack_8");
2628   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2629      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2630   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2631      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2632      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2633   } else {
2634      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2635   }
2636}
2637
2638MAYBE_USED
2639static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2640{
2641   PROF_EVENT(121, "die_mem_stack_8");
2642   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2643      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2644   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2645      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2646      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2647   } else {
2648      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2649   }
2650}
2651
2652/*--------------- adjustment by 12 bytes ---------------*/
2653
2654MAYBE_USED
2655static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2656{
2657   UInt otag = ecu | MC_OKIND_STACK;
2658   PROF_EVENT(112, "new_mem_stack_12");
2659   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2660      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2661      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2662   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2663      /* from previous test we don't have 8-alignment at offset +0,
2664         hence must have 8 alignment at offsets +4/-4.  Hence safe to
2665         do 4 at +0 and then 8 at +4/. */
2666      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2667      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2668   } else {
2669      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2670   }
2671}
2672
2673MAYBE_USED
2674static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2675{
2676   PROF_EVENT(112, "new_mem_stack_12");
2677   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2678      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2679      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2680   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2681      /* from previous test we don't have 8-alignment at offset +0,
2682         hence must have 8 alignment at offsets +4/-4.  Hence safe to
2683         do 4 at +0 and then 8 at +4/. */
2684      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2685      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2686   } else {
2687      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2688   }
2689}
2690
2691MAYBE_USED
2692static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2693{
2694   PROF_EVENT(122, "die_mem_stack_12");
2695   /* Note the -12 in the test */
2696   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2697      /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2698         -4. */
2699      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2700      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2701   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2702      /* We have 4-alignment at +0, but we don't have 8-alignment at
2703         -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2704         and then 8 at -8. */
2705      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2706      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2707   } else {
2708      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2709   }
2710}
2711
2712/*--------------- adjustment by 16 bytes ---------------*/
2713
2714MAYBE_USED
2715static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2716{
2717   UInt otag = ecu | MC_OKIND_STACK;
2718   PROF_EVENT(113, "new_mem_stack_16");
2719   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2720      /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2721      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2722      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2723   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2724      /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2725         Hence do 4 at +0, 8 at +4, 4 at +12. */
2726      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2727      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2728      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2729   } else {
2730      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2731   }
2732}
2733
2734MAYBE_USED
2735static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2736{
2737   PROF_EVENT(113, "new_mem_stack_16");
2738   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2739      /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2740      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2741      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2742   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2743      /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2744         Hence do 4 at +0, 8 at +4, 4 at +12. */
2745      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2746      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
2747      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2748   } else {
2749      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2750   }
2751}
2752
2753MAYBE_USED
2754static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2755{
2756   PROF_EVENT(123, "die_mem_stack_16");
2757   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2758      /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2759      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2760      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2761   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2762      /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
2763      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2764      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2765      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2766   } else {
2767      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2768   }
2769}
2770
2771/*--------------- adjustment by 32 bytes ---------------*/
2772
2773MAYBE_USED
2774static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2775{
2776   UInt otag = ecu | MC_OKIND_STACK;
2777   PROF_EVENT(114, "new_mem_stack_32");
2778   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2779      /* Straightforward */
2780      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2781      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2782      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2783      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2784   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2785      /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2786         +0,+28. */
2787      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2788      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2789      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2790      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2791      make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2792   } else {
2793      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2794   }
2795}
2796
2797MAYBE_USED
2798static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2799{
2800   PROF_EVENT(114, "new_mem_stack_32");
2801   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2802      /* Straightforward */
2803      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2804      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2805      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2806      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2807   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2808      /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2809         +0,+28. */
2810      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2811      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2812      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2813      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
2814      make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
2815   } else {
2816      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
2817   }
2818}
2819
2820MAYBE_USED
2821static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
2822{
2823   PROF_EVENT(124, "die_mem_stack_32");
2824   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2825      /* Straightforward */
2826      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2827      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2828      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2829      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2830   } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2831      /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
2832         4 at -32,-4. */
2833      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2834      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
2835      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
2836      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2837      make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2838   } else {
2839      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
2840   }
2841}
2842
2843/*--------------- adjustment by 112 bytes ---------------*/
2844
2845MAYBE_USED
2846static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
2847{
2848   UInt otag = ecu | MC_OKIND_STACK;
2849   PROF_EVENT(115, "new_mem_stack_112");
2850   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2851      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2852      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2853      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2854      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2855      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2856      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2857      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2858      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2859      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2860      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2861      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2862      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2863      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2864      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2865   } else {
2866      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
2867   }
2868}
2869
2870MAYBE_USED
2871static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
2872{
2873   PROF_EVENT(115, "new_mem_stack_112");
2874   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2875      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2876      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2877      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2878      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2879      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2880      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2881      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2882      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2883      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2884      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2885      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2886      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2887      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2888      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2889   } else {
2890      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
2891   }
2892}
2893
2894MAYBE_USED
2895static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
2896{
2897   PROF_EVENT(125, "die_mem_stack_112");
2898   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2899      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2900      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2901      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2902      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2903      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2904      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2905      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2906      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2907      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2908      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2909      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2910      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2911      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2912      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2913   } else {
2914      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
2915   }
2916}
2917
2918/*--------------- adjustment by 128 bytes ---------------*/
2919
2920MAYBE_USED
2921static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
2922{
2923   UInt otag = ecu | MC_OKIND_STACK;
2924   PROF_EVENT(116, "new_mem_stack_128");
2925   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2926      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2927      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2928      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2929      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2930      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2931      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2932      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2933      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2934      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2935      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2936      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2937      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2938      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2939      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2940      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2941      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2942   } else {
2943      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
2944   }
2945}
2946
2947MAYBE_USED
2948static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
2949{
2950   PROF_EVENT(116, "new_mem_stack_128");
2951   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2952      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2953      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2954      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2955      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2956      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2957      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2958      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2959      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2960      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2961      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2962      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2963      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2964      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2965      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2966      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2967      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2968   } else {
2969      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
2970   }
2971}
2972
2973MAYBE_USED
2974static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
2975{
2976   PROF_EVENT(126, "die_mem_stack_128");
2977   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2978      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
2979      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
2980      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2981      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2982      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2983      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2984      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2985      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2986      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2987      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2988      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2989      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2990      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2991      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2992      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2993      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2994   } else {
2995      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
2996   }
2997}
2998
2999/*--------------- adjustment by 144 bytes ---------------*/
3000
3001MAYBE_USED
3002static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3003{
3004   UInt otag = ecu | MC_OKIND_STACK;
3005   PROF_EVENT(117, "new_mem_stack_144");
3006   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3008      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3009      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3010      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3011      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3012      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3013      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3014      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3015      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3016      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3017      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3018      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3019      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3020      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3021      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3022      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3023      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3024      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3025   } else {
3026      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3027   }
3028}
3029
3030MAYBE_USED
3031static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3032{
3033   PROF_EVENT(117, "new_mem_stack_144");
3034   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3035      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3036      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3037      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3038      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3039      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3040      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3041      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3042      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3043      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3044      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3045      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3046      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3047      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3048      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3049      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3050      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3051      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3052      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3053   } else {
3054      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3055   }
3056}
3057
3058MAYBE_USED
3059static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3060{
3061   PROF_EVENT(127, "die_mem_stack_144");
3062   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3063      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3064      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3065      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3066      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3067      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3068      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3069      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3070      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3071      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3072      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3073      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3074      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3075      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3076      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3077      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3078      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3079      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3080      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3081   } else {
3082      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3083   }
3084}
3085
3086/*--------------- adjustment by 160 bytes ---------------*/
3087
3088MAYBE_USED
3089static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3090{
3091   UInt otag = ecu | MC_OKIND_STACK;
3092   PROF_EVENT(118, "new_mem_stack_160");
3093   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3094      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3095      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3096      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3097      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3098      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3099      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3100      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3101      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3102      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3103      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3104      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3105      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3106      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3107      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3108      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3109      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3110      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3111      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3112      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3113      make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3114   } else {
3115      MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3116   }
3117}
3118
3119MAYBE_USED
3120static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3121{
3122   PROF_EVENT(118, "new_mem_stack_160");
3123   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3124      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3125      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3126      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3127      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3128      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3129      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3130      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3131      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3132      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3133      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3134      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3135      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3136      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3137      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3138      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3139      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3140      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3141      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3142      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3143      make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3144   } else {
3145      make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3146   }
3147}
3148
3149MAYBE_USED
3150static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3151{
3152   PROF_EVENT(128, "die_mem_stack_160");
3153   if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3154      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3155      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3156      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3157      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3158      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3159      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3160      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3161      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3162      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3163      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3164      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3165      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3166      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3167      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3168      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3169      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3170      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3171      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3172      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3173      make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3174   } else {
3175      MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3176   }
3177}
3178
3179/*--------------- adjustment by N bytes ---------------*/
3180
3181static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3182{
3183   UInt otag = ecu | MC_OKIND_STACK;
3184   PROF_EVENT(115, "new_mem_stack_w_otag");
3185   MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3186}
3187
3188static void mc_new_mem_stack ( Addr a, SizeT len )
3189{
3190   PROF_EVENT(115, "new_mem_stack");
3191   make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3192}
3193
3194static void mc_die_mem_stack ( Addr a, SizeT len )
3195{
3196   PROF_EVENT(125, "die_mem_stack");
3197   MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3198}
3199
3200
3201/* The AMD64 ABI says:
3202
3203   "The 128-byte area beyond the location pointed to by %rsp is considered
3204    to be reserved and shall not be modified by signal or interrupt
3205    handlers.  Therefore, functions may use this area for temporary data
3206    that is not needed across function calls.  In particular, leaf functions
3207    may use this area for their entire stack frame, rather than adjusting
3208    the stack pointer in the prologue and epilogue.  This area is known as
3209    red zone [sic]."
3210
3211   So after any call or return we need to mark this redzone as containing
3212   undefined values.
3213
3214   Consider this:  we're in function f.  f calls g.  g moves rsp down
3215   modestly (say 16 bytes) and writes stuff all over the red zone, making it
3216   defined.  g returns.  f is buggy and reads from parts of the red zone
3217   that it didn't write on.  But because g filled that area in, f is going
3218   to be picking up defined V bits and so any errors from reading bits of
3219   the red zone it didn't write, will be missed.  The only solution I could
3220   think of was to make the red zone undefined when g returns to f.
3221
3222   This is in accordance with the ABI, which makes it clear the redzone
3223   is volatile across function calls.
3224
3225   The problem occurs the other way round too: f could fill the RZ up
3226   with defined values and g could mistakenly read them.  So the RZ
3227   also needs to be nuked on function calls.
3228*/
3229
3230
3231/* Here's a simple cache to hold nia -> ECU mappings.  It could be
3232   improved so as to have a lower miss rate. */
3233
3234static UWord stats__nia_cache_queries = 0;
3235static UWord stats__nia_cache_misses  = 0;
3236
3237typedef
3238   struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3239            UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3240   WCacheEnt;
3241
3242#define N_NIA_TO_ECU_CACHE 511
3243
3244static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3245
3246static void init_nia_to_ecu_cache ( void )
3247{
3248   UWord       i;
3249   Addr        zero_addr = 0;
3250   ExeContext* zero_ec;
3251   UInt        zero_ecu;
3252   /* Fill all the slots with an entry for address zero, and the
3253      relevant otags accordingly.  Hence the cache is initially filled
3254      with valid data. */
3255   zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3256   tl_assert(zero_ec);
3257   zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3258   tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3259   for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3260      nia_to_ecu_cache[i].nia0 = zero_addr;
3261      nia_to_ecu_cache[i].ecu0 = zero_ecu;
3262      nia_to_ecu_cache[i].nia1 = zero_addr;
3263      nia_to_ecu_cache[i].ecu1 = zero_ecu;
3264   }
3265}
3266
3267static inline UInt convert_nia_to_ecu ( Addr nia )
3268{
3269   UWord i;
3270   UInt        ecu;
3271   ExeContext* ec;
3272
3273   tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3274
3275   stats__nia_cache_queries++;
3276   i = nia % N_NIA_TO_ECU_CACHE;
3277   tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3278
3279   if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3280      return nia_to_ecu_cache[i].ecu0;
3281
3282   if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3283#     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3284      SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3285      SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3286#     undef SWAP
3287      return nia_to_ecu_cache[i].ecu0;
3288   }
3289
3290   stats__nia_cache_misses++;
3291   ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3292   tl_assert(ec);
3293   ecu = VG_(get_ECU_from_ExeContext)(ec);
3294   tl_assert(VG_(is_plausible_ECU)(ecu));
3295
3296   nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3297   nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3298
3299   nia_to_ecu_cache[i].nia0 = nia;
3300   nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3301   return ecu;
3302}
3303
3304
3305/* Note that this serves both the origin-tracking and
3306   no-origin-tracking modes.  We assume that calls to it are
3307   sufficiently infrequent that it isn't worth specialising for the
3308   with/without origin-tracking cases. */
3309void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3310{
3311   UInt otag;
3312   tl_assert(sizeof(UWord) == sizeof(SizeT));
3313   if (0)
3314      VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3315                  base, len, nia );
3316
3317   if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3318      UInt ecu = convert_nia_to_ecu ( nia );
3319      tl_assert(VG_(is_plausible_ECU)(ecu));
3320      otag = ecu | MC_OKIND_STACK;
3321   } else {
3322      tl_assert(nia == 0);
3323      otag = 0;
3324   }
3325
3326#  if 0
3327   /* Really slow version */
3328   MC_(make_mem_undefined)(base, len, otag);
3329#  endif
3330
3331#  if 0
3332   /* Slow(ish) version, which is fairly easily seen to be correct.
3333   */
3334   if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3335      make_aligned_word64_undefined(base +   0, otag);
3336      make_aligned_word64_undefined(base +   8, otag);
3337      make_aligned_word64_undefined(base +  16, otag);
3338      make_aligned_word64_undefined(base +  24, otag);
3339
3340      make_aligned_word64_undefined(base +  32, otag);
3341      make_aligned_word64_undefined(base +  40, otag);
3342      make_aligned_word64_undefined(base +  48, otag);
3343      make_aligned_word64_undefined(base +  56, otag);
3344
3345      make_aligned_word64_undefined(base +  64, otag);
3346      make_aligned_word64_undefined(base +  72, otag);
3347      make_aligned_word64_undefined(base +  80, otag);
3348      make_aligned_word64_undefined(base +  88, otag);
3349
3350      make_aligned_word64_undefined(base +  96, otag);
3351      make_aligned_word64_undefined(base + 104, otag);
3352      make_aligned_word64_undefined(base + 112, otag);
3353      make_aligned_word64_undefined(base + 120, otag);
3354   } else {
3355      MC_(make_mem_undefined)(base, len, otag);
3356   }
3357#  endif
3358
3359   /* Idea is: go fast when
3360         * 8-aligned and length is 128
3361         * the sm is available in the main primary map
3362         * the address range falls entirely with a single secondary map
3363      If all those conditions hold, just update the V+A bits by writing
3364      directly into the vabits array.  (If the sm was distinguished, this
3365      will make a copy and then write to it.)
3366   */
3367
3368   if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3369      /* Now we know the address range is suitably sized and aligned. */
3370      UWord a_lo = (UWord)(base);
3371      UWord a_hi = (UWord)(base + 128 - 1);
3372      tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3373      if (a_hi <= MAX_PRIMARY_ADDRESS) {
3374         // Now we know the entire range is within the main primary map.
3375         SecMap* sm    = get_secmap_for_writing_low(a_lo);
3376         SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3377         /* Now we know that the entire address range falls within a
3378            single secondary map, and that that secondary 'lives' in
3379            the main primary map. */
3380         if (LIKELY(sm == sm_hi)) {
3381            // Finally, we know that the range is entirely within one secmap.
3382            UWord   v_off = SM_OFF(a_lo);
3383            UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3384            p[ 0] = VA_BITS16_UNDEFINED;
3385            p[ 1] = VA_BITS16_UNDEFINED;
3386            p[ 2] = VA_BITS16_UNDEFINED;
3387            p[ 3] = VA_BITS16_UNDEFINED;
3388            p[ 4] = VA_BITS16_UNDEFINED;
3389            p[ 5] = VA_BITS16_UNDEFINED;
3390            p[ 6] = VA_BITS16_UNDEFINED;
3391            p[ 7] = VA_BITS16_UNDEFINED;
3392            p[ 8] = VA_BITS16_UNDEFINED;
3393            p[ 9] = VA_BITS16_UNDEFINED;
3394            p[10] = VA_BITS16_UNDEFINED;
3395            p[11] = VA_BITS16_UNDEFINED;
3396            p[12] = VA_BITS16_UNDEFINED;
3397            p[13] = VA_BITS16_UNDEFINED;
3398            p[14] = VA_BITS16_UNDEFINED;
3399            p[15] = VA_BITS16_UNDEFINED;
3400            if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3401               set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3402               set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3403               set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3404               set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3405               set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3406               set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3407               set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3408               set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3409               set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3410               set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3411               set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3412               set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3413               set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3414               set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3415               set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3416               set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3417            }
3418            return;
3419         }
3420      }
3421   }
3422
3423   /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3424   if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3425      /* Now we know the address range is suitably sized and aligned. */
3426      UWord a_lo = (UWord)(base);
3427      UWord a_hi = (UWord)(base + 288 - 1);
3428      tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3429      if (a_hi <= MAX_PRIMARY_ADDRESS) {
3430         // Now we know the entire range is within the main primary map.
3431         SecMap* sm    = get_secmap_for_writing_low(a_lo);
3432         SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3433         /* Now we know that the entire address range falls within a
3434            single secondary map, and that that secondary 'lives' in
3435            the main primary map. */
3436         if (LIKELY(sm == sm_hi)) {
3437            // Finally, we know that the range is entirely within one secmap.
3438            UWord   v_off = SM_OFF(a_lo);
3439            UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3440            p[ 0] = VA_BITS16_UNDEFINED;
3441            p[ 1] = VA_BITS16_UNDEFINED;
3442            p[ 2] = VA_BITS16_UNDEFINED;
3443            p[ 3] = VA_BITS16_UNDEFINED;
3444            p[ 4] = VA_BITS16_UNDEFINED;
3445            p[ 5] = VA_BITS16_UNDEFINED;
3446            p[ 6] = VA_BITS16_UNDEFINED;
3447            p[ 7] = VA_BITS16_UNDEFINED;
3448            p[ 8] = VA_BITS16_UNDEFINED;
3449            p[ 9] = VA_BITS16_UNDEFINED;
3450            p[10] = VA_BITS16_UNDEFINED;
3451            p[11] = VA_BITS16_UNDEFINED;
3452            p[12] = VA_BITS16_UNDEFINED;
3453            p[13] = VA_BITS16_UNDEFINED;
3454            p[14] = VA_BITS16_UNDEFINED;
3455            p[15] = VA_BITS16_UNDEFINED;
3456            p[16] = VA_BITS16_UNDEFINED;
3457            p[17] = VA_BITS16_UNDEFINED;
3458            p[18] = VA_BITS16_UNDEFINED;
3459            p[19] = VA_BITS16_UNDEFINED;
3460            p[20] = VA_BITS16_UNDEFINED;
3461            p[21] = VA_BITS16_UNDEFINED;
3462            p[22] = VA_BITS16_UNDEFINED;
3463            p[23] = VA_BITS16_UNDEFINED;
3464            p[24] = VA_BITS16_UNDEFINED;
3465            p[25] = VA_BITS16_UNDEFINED;
3466            p[26] = VA_BITS16_UNDEFINED;
3467            p[27] = VA_BITS16_UNDEFINED;
3468            p[28] = VA_BITS16_UNDEFINED;
3469            p[29] = VA_BITS16_UNDEFINED;
3470            p[30] = VA_BITS16_UNDEFINED;
3471            p[31] = VA_BITS16_UNDEFINED;
3472            p[32] = VA_BITS16_UNDEFINED;
3473            p[33] = VA_BITS16_UNDEFINED;
3474            p[34] = VA_BITS16_UNDEFINED;
3475            p[35] = VA_BITS16_UNDEFINED;
3476            if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3477               set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3478               set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3479               set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3480               set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3481               set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3482               set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3483               set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3484               set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3485               set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3486               set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3487               set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3488               set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3489               set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3490               set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3491               set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3492               set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3493               set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3494               set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3495               set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3496               set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3497               set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3498               set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3499               set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3500               set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3501               set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3502               set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3503               set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3504               set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3505               set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3506               set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3507               set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3508               set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3509               set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3510               set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3511               set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3512               set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3513            }
3514            return;
3515         }
3516      }
3517   }
3518
3519   /* else fall into slow case */
3520   MC_(make_mem_undefined_w_otag)(base, len, otag);
3521}
3522
3523
3524/*------------------------------------------------------------*/
3525/*--- Checking memory                                      ---*/
3526/*------------------------------------------------------------*/
3527
3528typedef
3529   enum {
3530      MC_Ok = 5,
3531      MC_AddrErr = 6,
3532      MC_ValueErr = 7
3533   }
3534   MC_ReadResult;
3535
3536
3537/* Check permissions for address range.  If inadequate permissions
3538   exist, *bad_addr is set to the offending address, so the caller can
3539   know what it is. */
3540
3541/* Returns True if [a .. a+len) is not addressible.  Otherwise,
3542   returns False, and if bad_addr is non-NULL, sets *bad_addr to
3543   indicate the lowest failing address.  Functions below are
3544   similar. */
3545Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3546{
3547   SizeT i;
3548   UWord vabits2;
3549
3550   PROF_EVENT(60, "check_mem_is_noaccess");
3551   for (i = 0; i < len; i++) {
3552      PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3553      vabits2 = get_vabits2(a);
3554      if (VA_BITS2_NOACCESS != vabits2) {
3555         if (bad_addr != NULL) *bad_addr = a;
3556         return False;
3557      }
3558      a++;
3559   }
3560   return True;
3561}
3562
3563static Bool is_mem_addressable ( Addr a, SizeT len,
3564                                 /*OUT*/Addr* bad_addr )
3565{
3566   SizeT i;
3567   UWord vabits2;
3568
3569   PROF_EVENT(62, "is_mem_addressable");
3570   for (i = 0; i < len; i++) {
3571      PROF_EVENT(63, "is_mem_addressable(loop)");
3572      vabits2 = get_vabits2(a);
3573      if (VA_BITS2_NOACCESS == vabits2) {
3574         if (bad_addr != NULL) *bad_addr = a;
3575         return False;
3576      }
3577      a++;
3578   }
3579   return True;
3580}
3581
3582static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3583                                      /*OUT*/Addr* bad_addr,
3584                                      /*OUT*/UInt* otag )
3585{
3586   SizeT i;
3587   UWord vabits2;
3588
3589   PROF_EVENT(64, "is_mem_defined");
3590   DEBUG("is_mem_defined\n");
3591
3592   if (otag)     *otag = 0;
3593   if (bad_addr) *bad_addr = 0;
3594   for (i = 0; i < len; i++) {
3595      PROF_EVENT(65, "is_mem_defined(loop)");
3596      vabits2 = get_vabits2(a);
3597      if (VA_BITS2_DEFINED != vabits2) {
3598         // Error!  Nb: Report addressability errors in preference to
3599         // definedness errors.  And don't report definedeness errors unless
3600         // --undef-value-errors=yes.
3601         if (bad_addr) {
3602            *bad_addr = a;
3603         }
3604         if (VA_BITS2_NOACCESS == vabits2) {
3605            return MC_AddrErr;
3606         }
3607         if (MC_(clo_mc_level) >= 2) {
3608            if (otag && MC_(clo_mc_level) == 3) {
3609               *otag = MC_(helperc_b_load1)( a );
3610            }
3611            return MC_ValueErr;
3612         }
3613      }
3614      a++;
3615   }
3616   return MC_Ok;
3617}
3618
3619
3620/* Like is_mem_defined but doesn't give up at the first uninitialised
3621   byte -- the entire range is always checked.  This is important for
3622   detecting errors in the case where a checked range strays into
3623   invalid memory, but that fact is not detected by the ordinary
3624   is_mem_defined(), because of an undefined section that precedes the
3625   out of range section, possibly as a result of an alignment hole in
3626   the checked data.  This version always checks the entire range and
3627   can report both a definedness and an accessbility error, if
3628   necessary. */
3629static void is_mem_defined_comprehensive (
3630               Addr a, SizeT len,
3631               /*OUT*/Bool* errorV,    /* is there a definedness err? */
3632               /*OUT*/Addr* bad_addrV, /* if so where? */
3633               /*OUT*/UInt* otagV,     /* and what's its otag? */
3634               /*OUT*/Bool* errorA,    /* is there an addressability err? */
3635               /*OUT*/Addr* bad_addrA  /* if so where? */
3636            )
3637{
3638   SizeT i;
3639   UWord vabits2;
3640   Bool  already_saw_errV = False;
3641
3642   PROF_EVENT(64, "is_mem_defined"); // fixme
3643   DEBUG("is_mem_defined_comprehensive\n");
3644
3645   tl_assert(!(*errorV || *errorA));
3646
3647   for (i = 0; i < len; i++) {
3648      PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3649      vabits2 = get_vabits2(a);
3650      switch (vabits2) {
3651         case VA_BITS2_DEFINED:
3652            a++;
3653            break;
3654         case VA_BITS2_UNDEFINED:
3655         case VA_BITS2_PARTDEFINED:
3656            if (!already_saw_errV) {
3657               *errorV    = True;
3658               *bad_addrV = a;
3659               if (MC_(clo_mc_level) == 3) {
3660                  *otagV = MC_(helperc_b_load1)( a );
3661               } else {
3662                  *otagV = 0;
3663               }
3664               already_saw_errV = True;
3665            }
3666            a++; /* keep going */
3667            break;
3668         case VA_BITS2_NOACCESS:
3669            *errorA    = True;
3670            *bad_addrA = a;
3671            return; /* give up now. */
3672         default:
3673            tl_assert(0);
3674      }
3675   }
3676}
3677
3678
3679/* Check a zero-terminated ascii string.  Tricky -- don't want to
3680   examine the actual bytes, to find the end, until we're sure it is
3681   safe to do so. */
3682
3683static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3684{
3685   UWord vabits2;
3686
3687   PROF_EVENT(66, "mc_is_defined_asciiz");
3688   DEBUG("mc_is_defined_asciiz\n");
3689
3690   if (otag)     *otag = 0;
3691   if (bad_addr) *bad_addr = 0;
3692   while (True) {
3693      PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3694      vabits2 = get_vabits2(a);
3695      if (VA_BITS2_DEFINED != vabits2) {
3696         // Error!  Nb: Report addressability errors in preference to
3697         // definedness errors.  And don't report definedeness errors unless
3698         // --undef-value-errors=yes.
3699         if (bad_addr) {
3700            *bad_addr = a;
3701         }
3702         if (VA_BITS2_NOACCESS == vabits2) {
3703            return MC_AddrErr;
3704         }
3705         if (MC_(clo_mc_level) >= 2) {
3706            if (otag && MC_(clo_mc_level) == 3) {
3707               *otag = MC_(helperc_b_load1)( a );
3708            }
3709            return MC_ValueErr;
3710         }
3711      }
3712      /* Ok, a is safe to read. */
3713      if (* ((UChar*)a) == 0) {
3714         return MC_Ok;
3715      }
3716      a++;
3717   }
3718}
3719
3720
3721/*------------------------------------------------------------*/
3722/*--- Memory event handlers                                ---*/
3723/*------------------------------------------------------------*/
3724
3725static
3726void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
3727                                Addr base, SizeT size )
3728{
3729   Addr bad_addr;
3730   Bool ok = is_mem_addressable ( base, size, &bad_addr );
3731
3732   if (!ok) {
3733      switch (part) {
3734      case Vg_CoreSysCall:
3735         MC_(record_memparam_error) ( tid, bad_addr,
3736                                      /*isAddrErr*/True, s, 0/*otag*/ );
3737         break;
3738
3739      case Vg_CoreSignal:
3740         MC_(record_core_mem_error)( tid, s );
3741         break;
3742
3743      default:
3744         VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3745      }
3746   }
3747}
3748
3749static
3750void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
3751                            Addr base, SizeT size )
3752{
3753   UInt otag = 0;
3754   Addr bad_addr;
3755   MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3756
3757   if (MC_Ok != res) {
3758      Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3759
3760      switch (part) {
3761      case Vg_CoreSysCall:
3762         MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3763                                      isAddrErr ? 0 : otag );
3764         break;
3765
3766      case Vg_CoreSysCallArgInMem:
3767         MC_(record_regparam_error) ( tid, s, otag );
3768         break;
3769
3770      /* If we're being asked to jump to a silly address, record an error
3771         message before potentially crashing the entire system. */
3772      case Vg_CoreTranslate:
3773         MC_(record_jump_error)( tid, bad_addr );
3774         break;
3775
3776      default:
3777         VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3778      }
3779   }
3780}
3781
3782static
3783void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3784                                   Char* s, Addr str )
3785{
3786   MC_ReadResult res;
3787   Addr bad_addr = 0;   // shut GCC up
3788   UInt otag = 0;
3789
3790   tl_assert(part == Vg_CoreSysCall);
3791   res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3792   if (MC_Ok != res) {
3793      Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3794      MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3795                                   isAddrErr ? 0 : otag );
3796   }
3797}
3798
3799/* Handling of mmap and mprotect is not as simple as it seems.
3800
3801   The underlying semantics are that memory obtained from mmap is
3802   always initialised, but may be inaccessible.  And changes to the
3803   protection of memory do not change its contents and hence not its
3804   definedness state.  Problem is we can't model
3805   inaccessible-but-with-some-definedness state; once we mark memory
3806   as inaccessible we lose all info about definedness, and so can't
3807   restore that if it is later made accessible again.
3808
3809   One obvious thing to do is this:
3810
3811      mmap/mprotect NONE  -> noaccess
3812      mmap/mprotect other -> defined
3813
3814   The problem case here is: taking accessible memory, writing
3815   uninitialised data to it, mprotecting it NONE and later mprotecting
3816   it back to some accessible state causes the undefinedness to be
3817   lost.
3818
3819   A better proposal is:
3820
3821     (1) mmap NONE       ->  make noaccess
3822     (2) mmap other      ->  make defined
3823
3824     (3) mprotect NONE   ->  # no change
3825     (4) mprotect other  ->  change any "noaccess" to "defined"
3826
3827   (2) is OK because memory newly obtained from mmap really is defined
3828       (zeroed out by the kernel -- doing anything else would
3829       constitute a massive security hole.)
3830
3831   (1) is OK because the only way to make the memory usable is via
3832       (4), in which case we also wind up correctly marking it all as
3833       defined.
3834
3835   (3) is the weak case.  We choose not to change memory state.
3836       (presumably the range is in some mixture of "defined" and
3837       "undefined", viz, accessible but with arbitrary V bits).  Doing
3838       nothing means we retain the V bits, so that if the memory is
3839       later mprotected "other", the V bits remain unchanged, so there
3840       can be no false negatives.  The bad effect is that if there's
3841       an access in the area, then MC cannot warn; but at least we'll
3842       get a SEGV to show, so it's better than nothing.
3843
3844   Consider the sequence (3) followed by (4).  Any memory that was
3845   "defined" or "undefined" previously retains its state (as
3846   required).  Any memory that was "noaccess" before can only have
3847   been made that way by (1), and so it's OK to change it to
3848   "defined".
3849
3850   See https://bugs.kde.org/show_bug.cgi?id=205541
3851   and https://bugs.kde.org/show_bug.cgi?id=210268
3852*/
3853static
3854void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
3855                       ULong di_handle )
3856{
3857   if (rr || ww || xx) {
3858      /* (2) mmap/mprotect other -> defined */
3859      MC_(make_mem_defined)(a, len);
3860   } else {
3861      /* (1) mmap/mprotect NONE  -> noaccess */
3862      MC_(make_mem_noaccess)(a, len);
3863   }
3864}
3865
3866static
3867void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
3868{
3869   if (rr || ww || xx) {
3870      /* (4) mprotect other  ->  change any "noaccess" to "defined" */
3871      make_mem_defined_if_noaccess(a, len);
3872   } else {
3873      /* (3) mprotect NONE   ->  # no change */
3874      /* do nothing */
3875   }
3876}
3877
3878
3879static
3880void mc_new_mem_startup( Addr a, SizeT len,
3881                         Bool rr, Bool ww, Bool xx, ULong di_handle )
3882{
3883   // Because code is defined, initialised variables get put in the data
3884   // segment and are defined, and uninitialised variables get put in the
3885   // bss segment and are auto-zeroed (and so defined).
3886   //
3887   // It's possible that there will be padding between global variables.
3888   // This will also be auto-zeroed, and marked as defined by Memcheck.  If
3889   // a program uses it, Memcheck will not complain.  This is arguably a
3890   // false negative, but it's a grey area -- the behaviour is defined (the
3891   // padding is zeroed) but it's probably not what the user intended.  And
3892   // we can't avoid it.
3893   //
3894   // Note: we generally ignore RWX permissions, because we can't track them
3895   // without requiring more than one A bit which would slow things down a
3896   // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
3897   // So we mark any such pages as "unaddressable".
3898   DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
3899         a, (ULong)len, rr, ww, xx);
3900   mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
3901}
3902
3903static
3904void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
3905{
3906   MC_(make_mem_defined)(a, len);
3907}
3908
3909
3910/*------------------------------------------------------------*/
3911/*--- Register event handlers                              ---*/
3912/*------------------------------------------------------------*/
3913
3914/* Try and get a nonzero origin for the guest state section of thread
3915   tid characterised by (offset,size).  Return 0 if nothing to show
3916   for it. */
3917static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
3918                                             Int offset, SizeT size )
3919{
3920   Int   sh2off;
3921   UInt  area[3];
3922   UInt  otag;
3923   sh2off = MC_(get_otrack_shadow_offset)( offset, size );
3924   if (sh2off == -1)
3925      return 0;  /* This piece of guest state is not tracked */
3926   tl_assert(sh2off >= 0);
3927   tl_assert(0 == (sh2off % 4));
3928   area[0] = 0x31313131;
3929   area[2] = 0x27272727;
3930   VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
3931   tl_assert(area[0] == 0x31313131);
3932   tl_assert(area[2] == 0x27272727);
3933   otag = area[1];
3934   return otag;
3935}
3936
3937
3938/* When some chunk of guest state is written, mark the corresponding
3939   shadow area as valid.  This is used to initialise arbitrarily large
3940   chunks of guest state, hence the _SIZE value, which has to be as
3941   big as the biggest guest state.
3942*/
3943static void mc_post_reg_write ( CorePart part, ThreadId tid,
3944                                PtrdiffT offset, SizeT size)
3945{
3946#  define MAX_REG_WRITE_SIZE 1696
3947   UChar area[MAX_REG_WRITE_SIZE];
3948   tl_assert(size <= MAX_REG_WRITE_SIZE);
3949   VG_(memset)(area, V_BITS8_DEFINED, size);
3950   VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
3951#  undef MAX_REG_WRITE_SIZE
3952}
3953
3954static
3955void mc_post_reg_write_clientcall ( ThreadId tid,
3956                                    PtrdiffT offset, SizeT size, Addr f)
3957{
3958   mc_post_reg_write(/*dummy*/0, tid, offset, size);
3959}
3960
3961/* Look at the definedness of the guest's shadow state for
3962   [offset, offset+len).  If any part of that is undefined, record
3963   a parameter error.
3964*/
3965static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
3966                              PtrdiffT offset, SizeT size)
3967{
3968   Int   i;
3969   Bool  bad;
3970   UInt  otag;
3971
3972   UChar area[16];
3973   tl_assert(size <= 16);
3974
3975   VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
3976
3977   bad = False;
3978   for (i = 0; i < size; i++) {
3979      if (area[i] != V_BITS8_DEFINED) {
3980         bad = True;
3981         break;
3982      }
3983   }
3984
3985   if (!bad)
3986      return;
3987
3988   /* We've found some undefinedness.  See if we can also find an
3989      origin for it. */
3990   otag = mb_get_origin_for_guest_offset( tid, offset, size );
3991   MC_(record_regparam_error) ( tid, s, otag );
3992}
3993
3994
3995/*------------------------------------------------------------*/
3996/*--- Functions called directly from generated code:       ---*/
3997/*--- Load/store handlers.                                 ---*/
3998/*------------------------------------------------------------*/
3999
4000/* Types:  LOADV32, LOADV16, LOADV8 are:
4001               UWord fn ( Addr a )
4002   so they return 32-bits on 32-bit machines and 64-bits on
4003   64-bit machines.  Addr has the same size as a host word.
4004
4005   LOADV64 is always  ULong fn ( Addr a )
4006
4007   Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4008   are a UWord, and for STOREV64 they are a ULong.
4009*/
4010
4011/* If any part of '_a' indicated by the mask is 1, either '_a' is not
4012   naturally '_sz/8'-aligned, or it exceeds the range covered by the
4013   primary map.  This is all very tricky (and important!), so let's
4014   work through the maths by hand (below), *and* assert for these
4015   values at startup. */
4016#define MASK(_szInBytes) \
4017   ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4018
4019/* MASK only exists so as to define this macro. */
4020#define UNALIGNED_OR_HIGH(_a,_szInBits) \
4021   ((_a) & MASK((_szInBits>>3)))
4022
4023/* On a 32-bit machine:
4024
4025   N_PRIMARY_BITS          == 16, so
4026   N_PRIMARY_MAP           == 0x10000, so
4027   N_PRIMARY_MAP-1         == 0xFFFF, so
4028   (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4029
4030   MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4031           = ~ ( 0xFFFF | 0xFFFF0000 )
4032           = ~ 0xFFFF'FFFF
4033           = 0
4034
4035   MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4036           = ~ ( 0xFFFE | 0xFFFF0000 )
4037           = ~ 0xFFFF'FFFE
4038           = 1
4039
4040   MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4041           = ~ ( 0xFFFC | 0xFFFF0000 )
4042           = ~ 0xFFFF'FFFC
4043           = 3
4044
4045   MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4046           = ~ ( 0xFFF8 | 0xFFFF0000 )
4047           = ~ 0xFFFF'FFF8
4048           = 7
4049
4050   Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4051   precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
4052   the 1-byte alignment case, it is always a zero value, since MASK(1)
4053   is zero.  All as expected.
4054
4055   On a 64-bit machine, it's more complex, since we're testing
4056   simultaneously for misalignment and for the address being at or
4057   above 32G:
4058
4059   N_PRIMARY_BITS          == 19, so
4060   N_PRIMARY_MAP           == 0x80000, so
4061   N_PRIMARY_MAP-1         == 0x7FFFF, so
4062   (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
4063
4064   MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
4065           = ~ ( 0xFFFF | 0x7FFFF'0000 )
4066           = ~ 0x7FFFF'FFFF
4067           = 0xFFFF'FFF8'0000'0000
4068
4069   MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
4070           = ~ ( 0xFFFE | 0x7FFFF'0000 )
4071           = ~ 0x7FFFF'FFFE
4072           = 0xFFFF'FFF8'0000'0001
4073
4074   MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
4075           = ~ ( 0xFFFC | 0x7FFFF'0000 )
4076           = ~ 0x7FFFF'FFFC
4077           = 0xFFFF'FFF8'0000'0003
4078
4079   MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
4080           = ~ ( 0xFFF8 | 0x7FFFF'0000 )
4081           = ~ 0x7FFFF'FFF8
4082           = 0xFFFF'FFF8'0000'0007
4083*/
4084
4085
4086/* ------------------------ Size = 8 ------------------------ */
4087
4088static INLINE
4089ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4090{
4091   PROF_EVENT(200, "mc_LOADV64");
4092
4093#ifndef PERF_FAST_LOADV
4094   return mc_LOADVn_slow( a, 64, isBigEndian );
4095#else
4096   {
4097      UWord   sm_off16, vabits16;
4098      SecMap* sm;
4099
4100      if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4101         PROF_EVENT(201, "mc_LOADV64-slow1");
4102         return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4103      }
4104
4105      sm       = get_secmap_for_reading_low(a);
4106      sm_off16 = SM_OFF_16(a);
4107      vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4108
4109      // Handle common case quickly: a is suitably aligned, is mapped, and
4110      // addressible.
4111      // Convert V bits from compact memory form to expanded register form.
4112      if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4113         return V_BITS64_DEFINED;
4114      } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4115         return V_BITS64_UNDEFINED;
4116      } else {
4117         /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4118         PROF_EVENT(202, "mc_LOADV64-slow2");
4119         return mc_LOADVn_slow( a, 64, isBigEndian );
4120      }
4121   }
4122#endif
4123}
4124
4125VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4126{
4127   return mc_LOADV64(a, True);
4128}
4129VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4130{
4131   return mc_LOADV64(a, False);
4132}
4133
4134
4135static INLINE
4136void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4137{
4138   PROF_EVENT(210, "mc_STOREV64");
4139
4140#ifndef PERF_FAST_STOREV
4141   // XXX: this slow case seems to be marginally faster than the fast case!
4142   // Investigate further.
4143   mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4144#else
4145   {
4146      UWord   sm_off16, vabits16;
4147      SecMap* sm;
4148
4149      if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4150         PROF_EVENT(211, "mc_STOREV64-slow1");
4151         mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4152         return;
4153      }
4154
4155      sm       = get_secmap_for_reading_low(a);
4156      sm_off16 = SM_OFF_16(a);
4157      vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4158
4159      if (LIKELY( !is_distinguished_sm(sm) &&
4160                          (VA_BITS16_DEFINED   == vabits16 ||
4161                           VA_BITS16_UNDEFINED == vabits16) ))
4162      {
4163         /* Handle common case quickly: a is suitably aligned, */
4164         /* is mapped, and is addressible. */
4165         // Convert full V-bits in register to compact 2-bit form.
4166         if (V_BITS64_DEFINED == vbits64) {
4167            ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4168         } else if (V_BITS64_UNDEFINED == vbits64) {
4169            ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4170         } else {
4171            /* Slow but general case -- writing partially defined bytes. */
4172            PROF_EVENT(212, "mc_STOREV64-slow2");
4173            mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4174         }
4175      } else {
4176         /* Slow but general case. */
4177         PROF_EVENT(213, "mc_STOREV64-slow3");
4178         mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4179      }
4180   }
4181#endif
4182}
4183
4184VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4185{
4186   mc_STOREV64(a, vbits64, True);
4187}
4188VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4189{
4190   mc_STOREV64(a, vbits64, False);
4191}
4192
4193
4194/* ------------------------ Size = 4 ------------------------ */
4195
4196static INLINE
4197UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4198{
4199   PROF_EVENT(220, "mc_LOADV32");
4200
4201#ifndef PERF_FAST_LOADV
4202   return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4203#else
4204   {
4205      UWord   sm_off, vabits8;
4206      SecMap* sm;
4207
4208      if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4209         PROF_EVENT(221, "mc_LOADV32-slow1");
4210         return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4211      }
4212
4213      sm      = get_secmap_for_reading_low(a);
4214      sm_off  = SM_OFF(a);
4215      vabits8 = sm->vabits8[sm_off];
4216
4217      // Handle common case quickly: a is suitably aligned, is mapped, and the
4218      // entire word32 it lives in is addressible.
4219      // Convert V bits from compact memory form to expanded register form.
4220      // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4221      // Almost certainly not necessary, but be paranoid.
4222      if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4223         return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4224      } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4225         return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4226      } else {
4227         /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4228         PROF_EVENT(222, "mc_LOADV32-slow2");
4229         return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4230      }
4231   }
4232#endif
4233}
4234
4235VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4236{
4237   return mc_LOADV32(a, True);
4238}
4239VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4240{
4241   return mc_LOADV32(a, False);
4242}
4243
4244
4245static INLINE
4246void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4247{
4248   PROF_EVENT(230, "mc_STOREV32");
4249
4250#ifndef PERF_FAST_STOREV
4251   mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4252#else
4253   {
4254      UWord   sm_off, vabits8;
4255      SecMap* sm;
4256
4257      if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4258         PROF_EVENT(231, "mc_STOREV32-slow1");
4259         mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4260         return;
4261      }
4262
4263      sm      = get_secmap_for_reading_low(a);
4264      sm_off  = SM_OFF(a);
4265      vabits8 = sm->vabits8[sm_off];
4266
4267      // Cleverness:  sometimes we don't have to write the shadow memory at
4268      // all, if we can tell that what we want to write is the same as what is
4269      // already there.  The 64/16/8 bit cases also have cleverness at this
4270      // point, but it works a little differently to the code below.
4271      if (V_BITS32_DEFINED == vbits32) {
4272         if (vabits8 == (UInt)VA_BITS8_DEFINED) {
4273            return;
4274         } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4275            sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4276         } else {
4277            // not defined/undefined, or distinguished and changing state
4278            PROF_EVENT(232, "mc_STOREV32-slow2");
4279            mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4280         }
4281      } else if (V_BITS32_UNDEFINED == vbits32) {
4282         if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4283            return;
4284         } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4285            sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4286         } else {
4287            // not defined/undefined, or distinguished and changing state
4288            PROF_EVENT(233, "mc_STOREV32-slow3");
4289            mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4290         }
4291      } else {
4292         // Partially defined word
4293         PROF_EVENT(234, "mc_STOREV32-slow4");
4294         mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4295      }
4296   }
4297#endif
4298}
4299
4300VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4301{
4302   mc_STOREV32(a, vbits32, True);
4303}
4304VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4305{
4306   mc_STOREV32(a, vbits32, False);
4307}
4308
4309
4310/* ------------------------ Size = 2 ------------------------ */
4311
4312static INLINE
4313UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4314{
4315   PROF_EVENT(240, "mc_LOADV16");
4316
4317#ifndef PERF_FAST_LOADV
4318   return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4319#else
4320   {
4321      UWord   sm_off, vabits8;
4322      SecMap* sm;
4323
4324      if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4325         PROF_EVENT(241, "mc_LOADV16-slow1");
4326         return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4327      }
4328
4329      sm      = get_secmap_for_reading_low(a);
4330      sm_off  = SM_OFF(a);
4331      vabits8 = sm->vabits8[sm_off];
4332      // Handle common case quickly: a is suitably aligned, is mapped, and is
4333      // addressible.
4334      // Convert V bits from compact memory form to expanded register form
4335      if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
4336      else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4337      else {
4338         // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4339         // the two sub-bytes.
4340         UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4341         if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
4342         else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4343         else {
4344            /* Slow case: the two bytes are not all-defined or all-undefined. */
4345            PROF_EVENT(242, "mc_LOADV16-slow2");
4346            return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4347         }
4348      }
4349   }
4350#endif
4351}
4352
4353VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4354{
4355   return mc_LOADV16(a, True);
4356}
4357VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4358{
4359   return mc_LOADV16(a, False);
4360}
4361
4362
4363static INLINE
4364void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4365{
4366   PROF_EVENT(250, "mc_STOREV16");
4367
4368#ifndef PERF_FAST_STOREV
4369   mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4370#else
4371   {
4372      UWord   sm_off, vabits8;
4373      SecMap* sm;
4374
4375      if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4376         PROF_EVENT(251, "mc_STOREV16-slow1");
4377         mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4378         return;
4379      }
4380
4381      sm      = get_secmap_for_reading_low(a);
4382      sm_off  = SM_OFF(a);
4383      vabits8 = sm->vabits8[sm_off];
4384      if (LIKELY( !is_distinguished_sm(sm) &&
4385                          (VA_BITS8_DEFINED   == vabits8 ||
4386                           VA_BITS8_UNDEFINED == vabits8) ))
4387      {
4388         /* Handle common case quickly: a is suitably aligned, */
4389         /* is mapped, and is addressible. */
4390         // Convert full V-bits in register to compact 2-bit form.
4391         if (V_BITS16_DEFINED == vbits16) {
4392            insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
4393                                         &(sm->vabits8[sm_off]) );
4394         } else if (V_BITS16_UNDEFINED == vbits16) {
4395            insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4396                                         &(sm->vabits8[sm_off]) );
4397         } else {
4398            /* Slow but general case -- writing partially defined bytes. */
4399            PROF_EVENT(252, "mc_STOREV16-slow2");
4400            mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4401         }
4402      } else {
4403         /* Slow but general case. */
4404         PROF_EVENT(253, "mc_STOREV16-slow3");
4405         mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4406      }
4407   }
4408#endif
4409}
4410
4411VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4412{
4413   mc_STOREV16(a, vbits16, True);
4414}
4415VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4416{
4417   mc_STOREV16(a, vbits16, False);
4418}
4419
4420
4421/* ------------------------ Size = 1 ------------------------ */
4422/* Note: endianness is irrelevant for size == 1 */
4423
4424VG_REGPARM(1)
4425UWord MC_(helperc_LOADV8) ( Addr a )
4426{
4427   PROF_EVENT(260, "mc_LOADV8");
4428
4429#ifndef PERF_FAST_LOADV
4430   return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4431#else
4432   {
4433      UWord   sm_off, vabits8;
4434      SecMap* sm;
4435
4436      if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4437         PROF_EVENT(261, "mc_LOADV8-slow1");
4438         return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4439      }
4440
4441      sm      = get_secmap_for_reading_low(a);
4442      sm_off  = SM_OFF(a);
4443      vabits8 = sm->vabits8[sm_off];
4444      // Convert V bits from compact memory form to expanded register form
4445      // Handle common case quickly: a is mapped, and the entire
4446      // word32 it lives in is addressible.
4447      if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
4448      else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4449      else {
4450         // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4451         // the single byte.
4452         UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4453         if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
4454         else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4455         else {
4456            /* Slow case: the byte is not all-defined or all-undefined. */
4457            PROF_EVENT(262, "mc_LOADV8-slow2");
4458            return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4459         }
4460      }
4461   }
4462#endif
4463}
4464
4465
4466VG_REGPARM(2)
4467void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4468{
4469   PROF_EVENT(270, "mc_STOREV8");
4470
4471#ifndef PERF_FAST_STOREV
4472   mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4473#else
4474   {
4475      UWord   sm_off, vabits8;
4476      SecMap* sm;
4477
4478      if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4479         PROF_EVENT(271, "mc_STOREV8-slow1");
4480         mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4481         return;
4482      }
4483
4484      sm      = get_secmap_for_reading_low(a);
4485      sm_off  = SM_OFF(a);
4486      vabits8 = sm->vabits8[sm_off];
4487      if (LIKELY
4488            ( !is_distinguished_sm(sm) &&
4489              ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
4490             || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
4491              )
4492            )
4493         )
4494      {
4495         /* Handle common case quickly: a is mapped, the entire word32 it
4496            lives in is addressible. */
4497         // Convert full V-bits in register to compact 2-bit form.
4498         if (V_BITS8_DEFINED == vbits8) {
4499            insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4500                                          &(sm->vabits8[sm_off]) );
4501         } else if (V_BITS8_UNDEFINED == vbits8) {
4502            insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4503                                          &(sm->vabits8[sm_off]) );
4504         } else {
4505            /* Slow but general case -- writing partially defined bytes. */
4506            PROF_EVENT(272, "mc_STOREV8-slow2");
4507            mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4508         }
4509      } else {
4510         /* Slow but general case. */
4511         PROF_EVENT(273, "mc_STOREV8-slow3");
4512         mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4513      }
4514   }
4515#endif
4516}
4517
4518
4519/*------------------------------------------------------------*/
4520/*--- Functions called directly from generated code:       ---*/
4521/*--- Value-check failure handlers.                        ---*/
4522/*------------------------------------------------------------*/
4523
4524/* Call these ones when an origin is available ... */
4525VG_REGPARM(1)
4526void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4527   MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4528}
4529
4530VG_REGPARM(1)
4531void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4532   MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4533}
4534
4535VG_REGPARM(1)
4536void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4537   MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4538}
4539
4540VG_REGPARM(1)
4541void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4542   MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4543}
4544
4545VG_REGPARM(2)
4546void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4547   MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4548}
4549
4550/* ... and these when an origin isn't available. */
4551
4552VG_REGPARM(0)
4553void MC_(helperc_value_check0_fail_no_o) ( void ) {
4554   MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4555}
4556
4557VG_REGPARM(0)
4558void MC_(helperc_value_check1_fail_no_o) ( void ) {
4559   MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4560}
4561
4562VG_REGPARM(0)
4563void MC_(helperc_value_check4_fail_no_o) ( void ) {
4564   MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4565}
4566
4567VG_REGPARM(0)
4568void MC_(helperc_value_check8_fail_no_o) ( void ) {
4569   MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4570}
4571
4572VG_REGPARM(1)
4573void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4574   MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4575}
4576
4577
4578/*------------------------------------------------------------*/
4579/*--- Metadata get/set functions, for client requests.     ---*/
4580/*------------------------------------------------------------*/
4581
4582// Nb: this expands the V+A bits out into register-form V bits, even though
4583// they're in memory.  This is for backward compatibility, and because it's
4584// probably what the user wants.
4585
4586/* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4587   error [no longer used], 3 == addressing error. */
4588/* Nb: We used to issue various definedness/addressability errors from here,
4589   but we took them out because they ranged from not-very-helpful to
4590   downright annoying, and they complicated the error data structures. */
4591static Int mc_get_or_set_vbits_for_client (
4592   Addr a,
4593   Addr vbits,
4594   SizeT szB,
4595   Bool setting, /* True <=> set vbits,  False <=> get vbits */
4596   Bool is_client_request /* True <=> real user request
4597                             False <=> internal call from gdbserver */
4598)
4599{
4600   SizeT i;
4601   Bool  ok;
4602   UChar vbits8;
4603
4604   /* Check that arrays are addressible before doing any getting/setting.
4605      vbits to be checked only for real user request. */
4606   for (i = 0; i < szB; i++) {
4607      if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4608          (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4609         return 3;
4610      }
4611   }
4612
4613   /* Do the copy */
4614   if (setting) {
4615      /* setting */
4616      for (i = 0; i < szB; i++) {
4617         ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4618         tl_assert(ok);
4619      }
4620   } else {
4621      /* getting */
4622      for (i = 0; i < szB; i++) {
4623         ok = get_vbits8(a + i, &vbits8);
4624         tl_assert(ok);
4625         ((UChar*)vbits)[i] = vbits8;
4626      }
4627      if (is_client_request)
4628        // The bytes in vbits[] have now been set, so mark them as such.
4629        MC_(make_mem_defined)(vbits, szB);
4630   }
4631
4632   return 1;
4633}
4634
4635
4636/*------------------------------------------------------------*/
4637/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
4638/*------------------------------------------------------------*/
4639
4640/* For the memory leak detector, say whether an entire 64k chunk of
4641   address space is possibly in use, or not.  If in doubt return
4642   True.
4643*/
4644Bool MC_(is_within_valid_secondary) ( Addr a )
4645{
4646   SecMap* sm = maybe_get_secmap_for ( a );
4647   if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
4648      /* Definitely not in use. */
4649      return False;
4650   } else {
4651      return True;
4652   }
4653}
4654
4655
4656/* For the memory leak detector, say whether or not a given word
4657   address is to be regarded as valid. */
4658Bool MC_(is_valid_aligned_word) ( Addr a )
4659{
4660   tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
4661   tl_assert(VG_IS_WORD_ALIGNED(a));
4662   if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
4663      return False;
4664   if (sizeof(UWord) == 8) {
4665      if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
4666         return False;
4667   }
4668   if (UNLIKELY(MC_(in_ignored_range)(a)))
4669      return False;
4670   else
4671      return True;
4672}
4673
4674
4675/*------------------------------------------------------------*/
4676/*--- Initialisation                                       ---*/
4677/*------------------------------------------------------------*/
4678
4679static void init_shadow_memory ( void )
4680{
4681   Int     i;
4682   SecMap* sm;
4683
4684   tl_assert(V_BIT_UNDEFINED   == 1);
4685   tl_assert(V_BIT_DEFINED     == 0);
4686   tl_assert(V_BITS8_UNDEFINED == 0xFF);
4687   tl_assert(V_BITS8_DEFINED   == 0);
4688
4689   /* Build the 3 distinguished secondaries */
4690   sm = &sm_distinguished[SM_DIST_NOACCESS];
4691   for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
4692
4693   sm = &sm_distinguished[SM_DIST_UNDEFINED];
4694   for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
4695
4696   sm = &sm_distinguished[SM_DIST_DEFINED];
4697   for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
4698
4699   /* Set up the primary map. */
4700   /* These entries gradually get overwritten as the used address
4701      space expands. */
4702   for (i = 0; i < N_PRIMARY_MAP; i++)
4703      primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
4704
4705   /* Auxiliary primary maps */
4706   init_auxmap_L1_L2();
4707
4708   /* auxmap_size = auxmap_used = 0;
4709      no ... these are statically initialised */
4710
4711   /* Secondary V bit table */
4712   secVBitTable = createSecVBitTable();
4713}
4714
4715
4716/*------------------------------------------------------------*/
4717/*--- Sanity check machinery (permanently engaged)         ---*/
4718/*------------------------------------------------------------*/
4719
4720static Bool mc_cheap_sanity_check ( void )
4721{
4722   n_sanity_cheap++;
4723   PROF_EVENT(490, "cheap_sanity_check");
4724   /* Check for sane operating level */
4725   if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4726      return False;
4727   /* nothing else useful we can rapidly check */
4728   return True;
4729}
4730
4731static Bool mc_expensive_sanity_check ( void )
4732{
4733   Int     i;
4734   Word    n_secmaps_found;
4735   SecMap* sm;
4736   HChar*  errmsg;
4737   Bool    bad = False;
4738
4739   if (0) VG_(printf)("expensive sanity check\n");
4740   if (0) return True;
4741
4742   n_sanity_expensive++;
4743   PROF_EVENT(491, "expensive_sanity_check");
4744
4745   /* Check for sane operating level */
4746   if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4747      return False;
4748
4749   /* Check that the 3 distinguished SMs are still as they should be. */
4750
4751   /* Check noaccess DSM. */
4752   sm = &sm_distinguished[SM_DIST_NOACCESS];
4753   for (i = 0; i < SM_CHUNKS; i++)
4754      if (sm->vabits8[i] != VA_BITS8_NOACCESS)
4755         bad = True;
4756
4757   /* Check undefined DSM. */
4758   sm = &sm_distinguished[SM_DIST_UNDEFINED];
4759   for (i = 0; i < SM_CHUNKS; i++)
4760      if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
4761         bad = True;
4762
4763   /* Check defined DSM. */
4764   sm = &sm_distinguished[SM_DIST_DEFINED];
4765   for (i = 0; i < SM_CHUNKS; i++)
4766      if (sm->vabits8[i] != VA_BITS8_DEFINED)
4767         bad = True;
4768
4769   if (bad) {
4770      VG_(printf)("memcheck expensive sanity: "
4771                  "distinguished_secondaries have changed\n");
4772      return False;
4773   }
4774
4775   /* If we're not checking for undefined value errors, the secondary V bit
4776    * table should be empty. */
4777   if (MC_(clo_mc_level) == 1) {
4778      if (0 != VG_(OSetGen_Size)(secVBitTable))
4779         return False;
4780   }
4781
4782   /* check the auxiliary maps, very thoroughly */
4783   n_secmaps_found = 0;
4784   errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
4785   if (errmsg) {
4786      VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
4787      return False;
4788   }
4789
4790   /* n_secmaps_found is now the number referred to by the auxiliary
4791      primary map.  Now add on the ones referred to by the main
4792      primary map. */
4793   for (i = 0; i < N_PRIMARY_MAP; i++) {
4794      if (primary_map[i] == NULL) {
4795         bad = True;
4796      } else {
4797         if (!is_distinguished_sm(primary_map[i]))
4798            n_secmaps_found++;
4799      }
4800   }
4801
4802   /* check that the number of secmaps issued matches the number that
4803      are reachable (iow, no secmap leaks) */
4804   if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
4805      bad = True;
4806
4807   if (bad) {
4808      VG_(printf)("memcheck expensive sanity: "
4809                  "apparent secmap leakage\n");
4810      return False;
4811   }
4812
4813   if (bad) {
4814      VG_(printf)("memcheck expensive sanity: "
4815                  "auxmap covers wrong address space\n");
4816      return False;
4817   }
4818
4819   /* there is only one pointer to each secmap (expensive) */
4820
4821   return True;
4822}
4823
4824/*------------------------------------------------------------*/
4825/*--- Command line args                                    ---*/
4826/*------------------------------------------------------------*/
4827
4828Bool          MC_(clo_partial_loads_ok)       = False;
4829Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
4830Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
4831LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
4832VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
4833Bool          MC_(clo_show_reachable)         = False;
4834Bool          MC_(clo_show_possibly_lost)     = True;
4835Bool          MC_(clo_workaround_gcc296_bugs) = False;
4836Int           MC_(clo_malloc_fill)            = -1;
4837Int           MC_(clo_free_fill)              = -1;
4838Int           MC_(clo_mc_level)               = 2;
4839
4840static Bool mc_process_cmd_line_options(Char* arg)
4841{
4842   Char* tmp_str;
4843
4844   tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
4845
4846   /* Set MC_(clo_mc_level):
4847         1 = A bit tracking only
4848         2 = A and V bit tracking, but no V bit origins
4849         3 = A and V bit tracking, and V bit origins
4850
4851      Do this by inspecting --undef-value-errors= and
4852      --track-origins=.  Reject the case --undef-value-errors=no
4853      --track-origins=yes as meaningless.
4854   */
4855   if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
4856      if (MC_(clo_mc_level) == 3) {
4857         goto bad_level;
4858      } else {
4859         MC_(clo_mc_level) = 1;
4860         return True;
4861      }
4862   }
4863   if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
4864      if (MC_(clo_mc_level) == 1)
4865         MC_(clo_mc_level) = 2;
4866      return True;
4867   }
4868   if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
4869      if (MC_(clo_mc_level) == 3)
4870         MC_(clo_mc_level) = 2;
4871      return True;
4872   }
4873   if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
4874      if (MC_(clo_mc_level) == 1) {
4875         goto bad_level;
4876      } else {
4877         MC_(clo_mc_level) = 3;
4878         return True;
4879      }
4880   }
4881
4882	if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
4883   else if VG_BOOL_CLO(arg, "--show-reachable",   MC_(clo_show_reachable))   {}
4884   else if VG_BOOL_CLO(arg, "--show-possibly-lost",
4885                                            MC_(clo_show_possibly_lost))     {}
4886   else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
4887                                            MC_(clo_workaround_gcc296_bugs)) {}
4888
4889   else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
4890                                               0, 10*1000*1000*1000LL) {}
4891
4892   else if VG_BINT_CLO(arg, "--freelist-big-blocks",
4893                       MC_(clo_freelist_big_blocks),
4894                       0, 10*1000*1000*1000LL) {}
4895
4896   else if VG_XACT_CLO(arg, "--leak-check=no",
4897                            MC_(clo_leak_check), LC_Off) {}
4898   else if VG_XACT_CLO(arg, "--leak-check=summary",
4899                            MC_(clo_leak_check), LC_Summary) {}
4900   else if VG_XACT_CLO(arg, "--leak-check=yes",
4901                            MC_(clo_leak_check), LC_Full) {}
4902   else if VG_XACT_CLO(arg, "--leak-check=full",
4903                            MC_(clo_leak_check), LC_Full) {}
4904
4905   else if VG_XACT_CLO(arg, "--leak-resolution=low",
4906                            MC_(clo_leak_resolution), Vg_LowRes) {}
4907   else if VG_XACT_CLO(arg, "--leak-resolution=med",
4908                            MC_(clo_leak_resolution), Vg_MedRes) {}
4909   else if VG_XACT_CLO(arg, "--leak-resolution=high",
4910                            MC_(clo_leak_resolution), Vg_HighRes) {}
4911
4912   else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
4913      Int  i;
4914      Bool ok  = parse_ignore_ranges(tmp_str);
4915      if (!ok)
4916        return False;
4917      tl_assert(ignoreRanges.used >= 0);
4918      tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
4919      for (i = 0; i < ignoreRanges.used; i++) {
4920         Addr s = ignoreRanges.start[i];
4921         Addr e = ignoreRanges.end[i];
4922         Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
4923         if (e <= s) {
4924            VG_(message)(Vg_DebugMsg,
4925               "ERROR: --ignore-ranges: end <= start in range:\n");
4926            VG_(message)(Vg_DebugMsg,
4927               "       0x%lx-0x%lx\n", s, e);
4928            return False;
4929         }
4930         if (e - s > limit) {
4931            VG_(message)(Vg_DebugMsg,
4932               "ERROR: --ignore-ranges: suspiciously large range:\n");
4933            VG_(message)(Vg_DebugMsg,
4934               "       0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
4935            return False;
4936	 }
4937      }
4938   }
4939
4940   else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
4941   else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
4942
4943   else
4944      return VG_(replacement_malloc_process_cmd_line_option)(arg);
4945
4946   return True;
4947
4948
4949  bad_level:
4950   VG_(fmsg_bad_option)(arg,
4951      "--track-origins=yes has no effect when --undef-value-errors=no.\n");
4952}
4953
4954static void mc_print_usage(void)
4955{
4956   VG_(printf)(
4957"    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
4958"    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
4959"    --show-reachable=no|yes          show reachable blocks in leak check? [no]\n"
4960"    --show-possibly-lost=no|yes      show possibly lost blocks in leak check?\n"
4961"                                     [yes]\n"
4962"    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
4963"    --track-origins=no|yes           show origins of undefined values? [no]\n"
4964"    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
4965"    --freelist-vol=<number>          volume of freed blocks queue      [20000000]\n"
4966"    --freelist-big-blocks=<number>   releases first blocks with size >= [1000000]\n"
4967"    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
4968"    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
4969"    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
4970"    --free-fill=<hexnumber>          fill free'd areas with given value\n"
4971   );
4972}
4973
4974static void mc_print_debug_usage(void)
4975{
4976   VG_(printf)(
4977"    (none)\n"
4978   );
4979}
4980
4981
4982/*------------------------------------------------------------*/
4983/*--- Client blocks                                        ---*/
4984/*------------------------------------------------------------*/
4985
4986/* Client block management:
4987
4988   This is managed as an expanding array of client block descriptors.
4989   Indices of live descriptors are issued to the client, so it can ask
4990   to free them later.  Therefore we cannot slide live entries down
4991   over dead ones.  Instead we must use free/inuse flags and scan for
4992   an empty slot at allocation time.  This in turn means allocation is
4993   relatively expensive, so we hope this does not happen too often.
4994
4995   An unused block has start == size == 0
4996*/
4997
4998/* type CGenBlock is defined in mc_include.h */
4999
5000/* This subsystem is self-initialising. */
5001static UWord      cgb_size = 0;
5002static UWord      cgb_used = 0;
5003static CGenBlock* cgbs     = NULL;
5004
5005/* Stats for this subsystem. */
5006static ULong cgb_used_MAX = 0;   /* Max in use. */
5007static ULong cgb_allocs   = 0;   /* Number of allocs. */
5008static ULong cgb_discards = 0;   /* Number of discards. */
5009static ULong cgb_search   = 0;   /* Number of searches. */
5010
5011
5012/* Get access to the client block array. */
5013void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5014                                 /*OUT*/UWord* nBlocks )
5015{
5016   *blocks  = cgbs;
5017   *nBlocks = cgb_used;
5018}
5019
5020
5021static
5022Int alloc_client_block ( void )
5023{
5024   UWord      i, sz_new;
5025   CGenBlock* cgbs_new;
5026
5027   cgb_allocs++;
5028
5029   for (i = 0; i < cgb_used; i++) {
5030      cgb_search++;
5031      if (cgbs[i].start == 0 && cgbs[i].size == 0)
5032         return i;
5033   }
5034
5035   /* Not found.  Try to allocate one at the end. */
5036   if (cgb_used < cgb_size) {
5037      cgb_used++;
5038      return cgb_used-1;
5039   }
5040
5041   /* Ok, we have to allocate a new one. */
5042   tl_assert(cgb_used == cgb_size);
5043   sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5044
5045   cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5046   for (i = 0; i < cgb_used; i++)
5047      cgbs_new[i] = cgbs[i];
5048
5049   if (cgbs != NULL)
5050      VG_(free)( cgbs );
5051   cgbs = cgbs_new;
5052
5053   cgb_size = sz_new;
5054   cgb_used++;
5055   if (cgb_used > cgb_used_MAX)
5056      cgb_used_MAX = cgb_used;
5057   return cgb_used-1;
5058}
5059
5060
5061static void show_client_block_stats ( void )
5062{
5063   VG_(message)(Vg_DebugMsg,
5064      "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5065      cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5066   );
5067}
5068static void print_monitor_help ( void )
5069{
5070   VG_(gdb_printf)
5071      (
5072"\n"
5073"memcheck monitor commands:\n"
5074"  get_vbits <addr> [<len>]\n"
5075"        returns validity bits for <len> (or 1) bytes at <addr>\n"
5076"            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5077"        Example: get_vbits 0x8049c78 10\n"
5078"  make_memory [noaccess|undefined\n"
5079"                     |defined|Definedifaddressable] <addr> [<len>]\n"
5080"        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5081"  check_memory [addressable|defined] <addr> [<len>]\n"
5082"        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5083"            and outputs a description of <addr>\n"
5084"  leak_check [full*|summary] [reachable|possibleleak*|definiteleak]\n"
5085"                [increased*|changed|any]\n"
5086"                [unlimited*|limited <max_loss_records_output>]\n"
5087"            * = defaults\n"
5088"        Examples: leak_check\n"
5089"                  leak_check summary any\n"
5090"                  leak_check full reachable any limited 100\n"
5091"  block_list <loss_record_nr>\n"
5092"        after a leak search, shows the list of blocks of <loss_record_nr>\n"
5093"  who_points_at <addr> [<len>]\n"
5094"        shows places pointing inside <len> (default 1) bytes at <addr>\n"
5095"        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
5096"         with len > 1, will also show \"interior pointers\")\n"
5097"\n");
5098}
5099
5100/* return True if request recognised, False otherwise */
5101static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
5102{
5103   Char* wcmd;
5104   Char s[VG_(strlen(req))]; /* copy for strtok_r */
5105   Char *ssaveptr;
5106
5107   VG_(strcpy) (s, req);
5108
5109   wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5110   /* NB: if possible, avoid introducing a new command below which
5111      starts with the same first letter(s) as an already existing
5112      command. This ensures a shorter abbreviation for the user. */
5113   switch (VG_(keyword_id)
5114           ("help get_vbits leak_check make_memory check_memory "
5115            "block_list who_points_at",
5116            wcmd, kwd_report_duplicated_matches)) {
5117   case -2: /* multiple matches */
5118      return True;
5119   case -1: /* not found */
5120      return False;
5121   case  0: /* help */
5122      print_monitor_help();
5123      return True;
5124   case  1: { /* get_vbits */
5125      Addr address;
5126      SizeT szB = 1;
5127      VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5128      if (szB != 0) {
5129         UChar vbits;
5130         Int i;
5131         Int unaddressable = 0;
5132         for (i = 0; i < szB; i++) {
5133            Int res = mc_get_or_set_vbits_for_client
5134               (address+i, (Addr) &vbits, 1,
5135                False, /* get them */
5136                False  /* is client request */ );
5137            /* we are before the first character on next line, print a \n. */
5138            if ((i % 32) == 0 && i != 0)
5139               VG_(gdb_printf) ("\n");
5140            /* we are before the next block of 4 starts, print a space. */
5141            else if ((i % 4) == 0 && i != 0)
5142               VG_(gdb_printf) (" ");
5143            if (res == 1) {
5144               VG_(gdb_printf) ("%02x", vbits);
5145            } else {
5146               tl_assert(3 == res);
5147               unaddressable++;
5148               VG_(gdb_printf) ("__");
5149            }
5150         }
5151         VG_(gdb_printf) ("\n");
5152         if (unaddressable) {
5153            VG_(gdb_printf)
5154               ("Address %p len %ld has %d bytes unaddressable\n",
5155                (void *)address, szB, unaddressable);
5156         }
5157      }
5158      return True;
5159   }
5160   case  2: { /* leak_check */
5161      Int err = 0;
5162      LeakCheckParams lcp;
5163      Char* kw;
5164
5165      lcp.mode               = LC_Full;
5166      lcp.show_reachable     = False;
5167      lcp.show_possibly_lost = True;
5168      lcp.deltamode          = LCD_Increased;
5169      lcp.max_loss_records_output = 999999999;
5170      lcp.requested_by_monitor_command = True;
5171
5172      for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5173           kw != NULL;
5174           kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5175         switch (VG_(keyword_id)
5176                 ("full summary "
5177                  "reachable possibleleak definiteleak "
5178                  "increased changed any "
5179                  "unlimited limited ",
5180                  kw, kwd_report_all)) {
5181         case -2: err++; break;
5182         case -1: err++; break;
5183         case  0: /* full */
5184            lcp.mode = LC_Full; break;
5185         case  1: /* summary */
5186            lcp.mode = LC_Summary; break;
5187         case  2: /* reachable */
5188            lcp.show_reachable = True;
5189            lcp.show_possibly_lost = True; break;
5190         case  3: /* possibleleak */
5191            lcp.show_reachable = False;
5192            lcp.show_possibly_lost = True; break;
5193         case  4: /* definiteleak */
5194            lcp.show_reachable = False;
5195            lcp.show_possibly_lost = False; break;
5196         case  5: /* increased */
5197            lcp.deltamode = LCD_Increased; break;
5198         case  6: /* changed */
5199            lcp.deltamode = LCD_Changed; break;
5200         case  7: /* any */
5201            lcp.deltamode = LCD_Any; break;
5202         case  8: /* unlimited */
5203            lcp.max_loss_records_output = 999999999; break;
5204         case  9: { /* limited */
5205            int int_value;
5206            char* endptr;
5207
5208            wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5209            if (wcmd == NULL) {
5210               int_value = 0;
5211               endptr = "empty"; /* to report an error below */
5212            } else {
5213               int_value = VG_(strtoll10) (wcmd, (Char **)&endptr);
5214            }
5215            if (*endptr != '\0')
5216               VG_(gdb_printf) ("missing or malformed integer value\n");
5217            else if (int_value > 0)
5218               lcp.max_loss_records_output = (UInt) int_value;
5219            else
5220               VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
5221                                int_value);
5222            break;
5223         }
5224         default:
5225            tl_assert (0);
5226         }
5227      }
5228      if (!err)
5229         MC_(detect_memory_leaks)(tid, &lcp);
5230      return True;
5231   }
5232
5233   case  3: { /* make_memory */
5234      Addr address;
5235      SizeT szB = 1;
5236      int kwdid = VG_(keyword_id)
5237         ("noaccess undefined defined Definedifaddressable",
5238          VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5239      VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5240      if (address == (Addr) 0 && szB == 0) return True;
5241      switch (kwdid) {
5242      case -2: break;
5243      case -1: break;
5244      case  0: MC_(make_mem_noaccess) (address, szB); break;
5245      case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5246                                                    MC_OKIND_USER ); break;
5247      case  2: MC_(make_mem_defined) ( address, szB ); break;
5248      case  3: make_mem_defined_if_addressable ( address, szB ); break;;
5249      default: tl_assert(0);
5250      }
5251      return True;
5252   }
5253
5254   case  4: { /* check_memory */
5255      Addr address;
5256      SizeT szB = 1;
5257      Addr bad_addr;
5258      UInt okind;
5259      char* src;
5260      UInt otag;
5261      UInt ecu;
5262      ExeContext* origin_ec;
5263      MC_ReadResult res;
5264
5265      int kwdid = VG_(keyword_id)
5266         ("addressable defined",
5267          VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5268      VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5269      if (address == (Addr) 0 && szB == 0) return True;
5270      switch (kwdid) {
5271      case -2: break;
5272      case -1: break;
5273      case  0:
5274         if (is_mem_addressable ( address, szB, &bad_addr ))
5275            VG_(gdb_printf) ("Address %p len %ld addressable\n",
5276                             (void *)address, szB);
5277         else
5278            VG_(gdb_printf)
5279               ("Address %p len %ld not addressable:\nbad address %p\n",
5280                (void *)address, szB, (void *) bad_addr);
5281         MC_(pp_describe_addr) (address);
5282         break;
5283      case  1: res = is_mem_defined ( address, szB, &bad_addr, &otag );
5284         if (MC_AddrErr == res)
5285            VG_(gdb_printf)
5286               ("Address %p len %ld not addressable:\nbad address %p\n",
5287                (void *)address, szB, (void *) bad_addr);
5288         else if (MC_ValueErr == res) {
5289            okind = otag & 3;
5290            switch (okind) {
5291            case MC_OKIND_STACK:
5292               src = " was created by a stack allocation"; break;
5293            case MC_OKIND_HEAP:
5294               src = " was created by a heap allocation"; break;
5295            case MC_OKIND_USER:
5296               src = " was created by a client request"; break;
5297            case MC_OKIND_UNKNOWN:
5298               src = ""; break;
5299            default: tl_assert(0);
5300            }
5301            VG_(gdb_printf)
5302               ("Address %p len %ld not defined:\n"
5303                "Uninitialised value at %p%s\n",
5304                (void *)address, szB, (void *) bad_addr, src);
5305            ecu = otag & ~3;
5306            if (VG_(is_plausible_ECU)(ecu)) {
5307               origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5308               VG_(pp_ExeContext)( origin_ec );
5309            }
5310         }
5311         else
5312            VG_(gdb_printf) ("Address %p len %ld defined\n",
5313                             (void *)address, szB);
5314         MC_(pp_describe_addr) (address);
5315         break;
5316      default: tl_assert(0);
5317      }
5318      return True;
5319   }
5320
5321   case  5: { /* block_list */
5322      Char* wl;
5323      Char *endptr;
5324      UInt lr_nr = 0;
5325      wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
5326      lr_nr = VG_(strtoull10) (wl, &endptr);
5327      if (wl != NULL && *endptr != '\0') {
5328         VG_(gdb_printf) ("malformed integer\n");
5329      } else {
5330         // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
5331         if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
5332            VG_(gdb_printf) ("invalid loss record nr\n");
5333      }
5334      return True;
5335   }
5336
5337   case  6: { /* who_points_at */
5338      Addr address;
5339      SizeT szB = 1;
5340
5341      VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5342      if (address == (Addr) 0) {
5343         VG_(gdb_printf) ("Cannot search who points at 0x0\n");
5344         return True;
5345      }
5346      MC_(who_points_at) (address, szB);
5347      return True;
5348   }
5349
5350   default:
5351      tl_assert(0);
5352      return False;
5353   }
5354}
5355
5356/*------------------------------------------------------------*/
5357/*--- Client requests                                      ---*/
5358/*------------------------------------------------------------*/
5359
5360static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5361{
5362   Int   i;
5363   Bool  ok;
5364   Addr  bad_addr;
5365
5366   if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5367       && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5368       && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5369       && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
5370       && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
5371       && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
5372       && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
5373       && VG_USERREQ__MEMPOOL_FREE     != arg[0]
5374       && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
5375       && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
5376       && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
5377       && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
5378       && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0])
5379      return False;
5380
5381   switch (arg[0]) {
5382      case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
5383         ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5384         if (!ok)
5385            MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5386         *ret = ok ? (UWord)NULL : bad_addr;
5387         break;
5388
5389      case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5390         Bool errorV    = False;
5391         Addr bad_addrV = 0;
5392         UInt otagV     = 0;
5393         Bool errorA    = False;
5394         Addr bad_addrA = 0;
5395         is_mem_defined_comprehensive(
5396            arg[1], arg[2],
5397            &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5398         );
5399         if (errorV) {
5400            MC_(record_user_error) ( tid, bad_addrV,
5401                                     /*isAddrErr*/False, otagV );
5402         }
5403         if (errorA) {
5404            MC_(record_user_error) ( tid, bad_addrA,
5405                                     /*isAddrErr*/True, 0 );
5406         }
5407         /* Return the lower of the two erring addresses, if any. */
5408         *ret = 0;
5409         if (errorV && !errorA) {
5410            *ret = bad_addrV;
5411         }
5412         if (!errorV && errorA) {
5413            *ret = bad_addrA;
5414         }
5415         if (errorV && errorA) {
5416            *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5417         }
5418         break;
5419      }
5420
5421      case VG_USERREQ__DO_LEAK_CHECK: {
5422         LeakCheckParams lcp;
5423
5424         if (arg[1] == 0)
5425            lcp.mode = LC_Full;
5426         else if (arg[1] == 1)
5427            lcp.mode = LC_Summary;
5428         else {
5429            VG_(message)(Vg_UserMsg,
5430                         "Warning: unknown memcheck leak search mode\n");
5431            lcp.mode = LC_Full;
5432         }
5433
5434         lcp.show_reachable = MC_(clo_show_reachable);
5435         lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
5436
5437         if (arg[2] == 0)
5438            lcp.deltamode = LCD_Any;
5439         else if (arg[2] == 1)
5440            lcp.deltamode = LCD_Increased;
5441         else if (arg[2] == 2)
5442            lcp.deltamode = LCD_Changed;
5443         else {
5444            VG_(message)
5445               (Vg_UserMsg,
5446                "Warning: unknown memcheck leak search deltamode\n");
5447            lcp.deltamode = LCD_Any;
5448         }
5449         lcp.max_loss_records_output = 999999999;
5450         lcp.requested_by_monitor_command = False;
5451
5452         MC_(detect_memory_leaks)(tid, &lcp);
5453         *ret = 0; /* return value is meaningless */
5454         break;
5455      }
5456
5457      case VG_USERREQ__MAKE_MEM_NOACCESS:
5458         MC_(make_mem_noaccess) ( arg[1], arg[2] );
5459         *ret = -1;
5460         break;
5461
5462      case VG_USERREQ__MAKE_MEM_UNDEFINED:
5463         make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5464                                              MC_OKIND_USER );
5465         *ret = -1;
5466         break;
5467
5468      case VG_USERREQ__MAKE_MEM_DEFINED:
5469         MC_(make_mem_defined) ( arg[1], arg[2] );
5470         *ret = -1;
5471         break;
5472
5473      case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5474         make_mem_defined_if_addressable ( arg[1], arg[2] );
5475         *ret = -1;
5476         break;
5477
5478      case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5479         if (arg[1] != 0 && arg[2] != 0) {
5480            i = alloc_client_block();
5481            /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5482            cgbs[i].start = arg[1];
5483            cgbs[i].size  = arg[2];
5484            cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
5485            cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5486            *ret = i;
5487         } else
5488            *ret = -1;
5489         break;
5490
5491      case VG_USERREQ__DISCARD: /* discard */
5492         if (cgbs == NULL
5493             || arg[2] >= cgb_used ||
5494             (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5495            *ret = 1;
5496         } else {
5497            tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5498            cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5499            VG_(free)(cgbs[arg[2]].desc);
5500            cgb_discards++;
5501            *ret = 0;
5502         }
5503         break;
5504
5505      case VG_USERREQ__GET_VBITS:
5506         *ret = mc_get_or_set_vbits_for_client
5507                   ( arg[1], arg[2], arg[3],
5508                     False /* get them */,
5509                     True /* is client request */ );
5510         break;
5511
5512      case VG_USERREQ__SET_VBITS:
5513         *ret = mc_get_or_set_vbits_for_client
5514                   ( arg[1], arg[2], arg[3],
5515                     True /* set them */,
5516                     True /* is client request */ );
5517         break;
5518
5519      case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5520         UWord** argp = (UWord**)arg;
5521         // MC_(bytes_leaked) et al were set by the last leak check (or zero
5522         // if no prior leak checks performed).
5523         *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5524         *argp[2] = MC_(bytes_dubious);
5525         *argp[3] = MC_(bytes_reachable);
5526         *argp[4] = MC_(bytes_suppressed);
5527         // there is no argp[5]
5528         //*argp[5] = MC_(bytes_indirect);
5529         // XXX need to make *argp[1-4] defined;  currently done in the
5530         // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5531         *ret = 0;
5532         return True;
5533      }
5534      case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5535         UWord** argp = (UWord**)arg;
5536         // MC_(blocks_leaked) et al were set by the last leak check (or zero
5537         // if no prior leak checks performed).
5538         *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5539         *argp[2] = MC_(blocks_dubious);
5540         *argp[3] = MC_(blocks_reachable);
5541         *argp[4] = MC_(blocks_suppressed);
5542         // there is no argp[5]
5543         //*argp[5] = MC_(blocks_indirect);
5544         // XXX need to make *argp[1-4] defined;  currently done in the
5545         // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
5546         *ret = 0;
5547         return True;
5548      }
5549      case VG_USERREQ__MALLOCLIKE_BLOCK: {
5550         Addr p         = (Addr)arg[1];
5551         SizeT sizeB    =       arg[2];
5552         UInt rzB       =       arg[3];
5553         Bool is_zeroed = (Bool)arg[4];
5554
5555         MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
5556                          MC_AllocCustom, MC_(malloc_list) );
5557         if (rzB > 0) {
5558            MC_(make_mem_noaccess) ( p - rzB, rzB);
5559            MC_(make_mem_noaccess) ( p + sizeB, rzB);
5560         }
5561         return True;
5562      }
5563      case VG_USERREQ__RESIZEINPLACE_BLOCK: {
5564         Addr p         = (Addr)arg[1];
5565         SizeT oldSizeB =       arg[2];
5566         SizeT newSizeB =       arg[3];
5567         UInt rzB       =       arg[4];
5568
5569         MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
5570         return True;
5571      }
5572      case VG_USERREQ__FREELIKE_BLOCK: {
5573         Addr p         = (Addr)arg[1];
5574         UInt rzB       =       arg[2];
5575
5576         MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
5577         return True;
5578      }
5579
5580      case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
5581         Char* s   = (Char*)arg[1];
5582         Addr  dst = (Addr) arg[2];
5583         Addr  src = (Addr) arg[3];
5584         SizeT len = (SizeT)arg[4];
5585         MC_(record_overlap_error)(tid, s, src, dst, len);
5586         return True;
5587      }
5588
5589      case VG_USERREQ__CREATE_MEMPOOL: {
5590         Addr pool      = (Addr)arg[1];
5591         UInt rzB       =       arg[2];
5592         Bool is_zeroed = (Bool)arg[3];
5593
5594         MC_(create_mempool) ( pool, rzB, is_zeroed );
5595         return True;
5596      }
5597
5598      case VG_USERREQ__DESTROY_MEMPOOL: {
5599         Addr pool      = (Addr)arg[1];
5600
5601         MC_(destroy_mempool) ( pool );
5602         return True;
5603      }
5604
5605      case VG_USERREQ__MEMPOOL_ALLOC: {
5606         Addr pool      = (Addr)arg[1];
5607         Addr addr      = (Addr)arg[2];
5608         UInt size      =       arg[3];
5609
5610         MC_(mempool_alloc) ( tid, pool, addr, size );
5611         return True;
5612      }
5613
5614      case VG_USERREQ__MEMPOOL_FREE: {
5615         Addr pool      = (Addr)arg[1];
5616         Addr addr      = (Addr)arg[2];
5617
5618         MC_(mempool_free) ( pool, addr );
5619         return True;
5620      }
5621
5622      case VG_USERREQ__MEMPOOL_TRIM: {
5623         Addr pool      = (Addr)arg[1];
5624         Addr addr      = (Addr)arg[2];
5625         UInt size      =       arg[3];
5626
5627         MC_(mempool_trim) ( pool, addr, size );
5628         return True;
5629      }
5630
5631      case VG_USERREQ__MOVE_MEMPOOL: {
5632         Addr poolA     = (Addr)arg[1];
5633         Addr poolB     = (Addr)arg[2];
5634
5635         MC_(move_mempool) ( poolA, poolB );
5636         return True;
5637      }
5638
5639      case VG_USERREQ__MEMPOOL_CHANGE: {
5640         Addr pool      = (Addr)arg[1];
5641         Addr addrA     = (Addr)arg[2];
5642         Addr addrB     = (Addr)arg[3];
5643         UInt size      =       arg[4];
5644
5645         MC_(mempool_change) ( pool, addrA, addrB, size );
5646         return True;
5647      }
5648
5649      case VG_USERREQ__MEMPOOL_EXISTS: {
5650         Addr pool      = (Addr)arg[1];
5651
5652         *ret = (UWord) MC_(mempool_exists) ( pool );
5653	 return True;
5654      }
5655
5656      case VG_USERREQ__GDB_MONITOR_COMMAND: {
5657         Bool handled = handle_gdb_monitor_command (tid, (Char*)arg[1]);
5658         if (handled)
5659            *ret = 1;
5660         else
5661            *ret = 0;
5662         return handled;
5663      }
5664
5665      default:
5666         VG_(message)(
5667            Vg_UserMsg,
5668            "Warning: unknown memcheck client request code %llx\n",
5669            (ULong)arg[0]
5670         );
5671         return False;
5672   }
5673   return True;
5674}
5675
5676
5677/*------------------------------------------------------------*/
5678/*--- Crude profiling machinery.                           ---*/
5679/*------------------------------------------------------------*/
5680
5681// We track a number of interesting events (using PROF_EVENT)
5682// if MC_PROFILE_MEMORY is defined.
5683
5684#ifdef MC_PROFILE_MEMORY
5685
5686UInt   MC_(event_ctr)[N_PROF_EVENTS];
5687HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
5688
5689static void init_prof_mem ( void )
5690{
5691   Int i;
5692   for (i = 0; i < N_PROF_EVENTS; i++) {
5693      MC_(event_ctr)[i] = 0;
5694      MC_(event_ctr_name)[i] = NULL;
5695   }
5696}
5697
5698static void done_prof_mem ( void )
5699{
5700   Int  i;
5701   Bool spaced = False;
5702   for (i = 0; i < N_PROF_EVENTS; i++) {
5703      if (!spaced && (i % 10) == 0) {
5704         VG_(printf)("\n");
5705         spaced = True;
5706      }
5707      if (MC_(event_ctr)[i] > 0) {
5708         spaced = False;
5709         VG_(printf)( "prof mem event %3d: %9d   %s\n",
5710                      i, MC_(event_ctr)[i],
5711                      MC_(event_ctr_name)[i]
5712                         ? MC_(event_ctr_name)[i] : "unnamed");
5713      }
5714   }
5715}
5716
5717#else
5718
5719static void init_prof_mem ( void ) { }
5720static void done_prof_mem ( void ) { }
5721
5722#endif
5723
5724
5725/*------------------------------------------------------------*/
5726/*--- Origin tracking stuff                                ---*/
5727/*------------------------------------------------------------*/
5728
5729/*--------------------------------------------*/
5730/*--- Origin tracking: load handlers       ---*/
5731/*--------------------------------------------*/
5732
5733static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
5734   return or1 > or2 ? or1 : or2;
5735}
5736
5737UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
5738   OCacheLine* line;
5739   UChar descr;
5740   UWord lineoff = oc_line_offset(a);
5741   UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5742
5743   if (OC_ENABLE_ASSERTIONS) {
5744      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5745   }
5746
5747   line = find_OCacheLine( a );
5748
5749   descr = line->descr[lineoff];
5750   if (OC_ENABLE_ASSERTIONS) {
5751      tl_assert(descr < 0x10);
5752   }
5753
5754   if (LIKELY(0 == (descr & (1 << byteoff))))  {
5755      return 0;
5756   } else {
5757      return line->w32[lineoff];
5758   }
5759}
5760
5761UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
5762   OCacheLine* line;
5763   UChar descr;
5764   UWord lineoff, byteoff;
5765
5766   if (UNLIKELY(a & 1)) {
5767      /* Handle misaligned case, slowly. */
5768      UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
5769      UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
5770      return merge_origins(oLo, oHi);
5771   }
5772
5773   lineoff = oc_line_offset(a);
5774   byteoff = a & 3; /* 0 or 2 */
5775
5776   if (OC_ENABLE_ASSERTIONS) {
5777      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5778   }
5779   line = find_OCacheLine( a );
5780
5781   descr = line->descr[lineoff];
5782   if (OC_ENABLE_ASSERTIONS) {
5783      tl_assert(descr < 0x10);
5784   }
5785
5786   if (LIKELY(0 == (descr & (3 << byteoff)))) {
5787      return 0;
5788   } else {
5789      return line->w32[lineoff];
5790   }
5791}
5792
5793UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
5794   OCacheLine* line;
5795   UChar descr;
5796   UWord lineoff;
5797
5798   if (UNLIKELY(a & 3)) {
5799      /* Handle misaligned case, slowly. */
5800      UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
5801      UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
5802      return merge_origins(oLo, oHi);
5803   }
5804
5805   lineoff = oc_line_offset(a);
5806   if (OC_ENABLE_ASSERTIONS) {
5807      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5808   }
5809
5810   line = find_OCacheLine( a );
5811
5812   descr = line->descr[lineoff];
5813   if (OC_ENABLE_ASSERTIONS) {
5814      tl_assert(descr < 0x10);
5815   }
5816
5817   if (LIKELY(0 == descr)) {
5818      return 0;
5819   } else {
5820      return line->w32[lineoff];
5821   }
5822}
5823
5824UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
5825   OCacheLine* line;
5826   UChar descrLo, descrHi, descr;
5827   UWord lineoff;
5828
5829   if (UNLIKELY(a & 7)) {
5830      /* Handle misaligned case, slowly. */
5831      UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
5832      UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
5833      return merge_origins(oLo, oHi);
5834   }
5835
5836   lineoff = oc_line_offset(a);
5837   if (OC_ENABLE_ASSERTIONS) {
5838      tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5839   }
5840
5841   line = find_OCacheLine( a );
5842
5843   descrLo = line->descr[lineoff + 0];
5844   descrHi = line->descr[lineoff + 1];
5845   descr   = descrLo | descrHi;
5846   if (OC_ENABLE_ASSERTIONS) {
5847      tl_assert(descr < 0x10);
5848   }
5849
5850   if (LIKELY(0 == descr)) {
5851      return 0; /* both 32-bit chunks are defined */
5852   } else {
5853      UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
5854      UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
5855      return merge_origins(oLo, oHi);
5856   }
5857}
5858
5859UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
5860   UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
5861   UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
5862   UInt oBoth = merge_origins(oLo, oHi);
5863   return (UWord)oBoth;
5864}
5865
5866UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
5867   UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
5868   UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
5869   UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
5870   UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
5871   UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
5872                              merge_origins(oQ2, oQ3));
5873   return (UWord)oAll;
5874}
5875
5876
5877/*--------------------------------------------*/
5878/*--- Origin tracking: store handlers      ---*/
5879/*--------------------------------------------*/
5880
5881void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
5882   OCacheLine* line;
5883   UWord lineoff = oc_line_offset(a);
5884   UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5885
5886   if (OC_ENABLE_ASSERTIONS) {
5887      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5888   }
5889
5890   line = find_OCacheLine( a );
5891
5892   if (d32 == 0) {
5893      line->descr[lineoff] &= ~(1 << byteoff);
5894   } else {
5895      line->descr[lineoff] |= (1 << byteoff);
5896      line->w32[lineoff] = d32;
5897   }
5898}
5899
5900void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
5901   OCacheLine* line;
5902   UWord lineoff, byteoff;
5903
5904   if (UNLIKELY(a & 1)) {
5905      /* Handle misaligned case, slowly. */
5906      MC_(helperc_b_store1)( a + 0, d32 );
5907      MC_(helperc_b_store1)( a + 1, d32 );
5908      return;
5909   }
5910
5911   lineoff = oc_line_offset(a);
5912   byteoff = a & 3; /* 0 or 2 */
5913
5914   if (OC_ENABLE_ASSERTIONS) {
5915      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5916   }
5917
5918   line = find_OCacheLine( a );
5919
5920   if (d32 == 0) {
5921      line->descr[lineoff] &= ~(3 << byteoff);
5922   } else {
5923      line->descr[lineoff] |= (3 << byteoff);
5924      line->w32[lineoff] = d32;
5925   }
5926}
5927
5928void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
5929   OCacheLine* line;
5930   UWord lineoff;
5931
5932   if (UNLIKELY(a & 3)) {
5933      /* Handle misaligned case, slowly. */
5934      MC_(helperc_b_store2)( a + 0, d32 );
5935      MC_(helperc_b_store2)( a + 2, d32 );
5936      return;
5937   }
5938
5939   lineoff = oc_line_offset(a);
5940   if (OC_ENABLE_ASSERTIONS) {
5941      tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5942   }
5943
5944   line = find_OCacheLine( a );
5945
5946   if (d32 == 0) {
5947      line->descr[lineoff] = 0;
5948   } else {
5949      line->descr[lineoff] = 0xF;
5950      line->w32[lineoff] = d32;
5951   }
5952}
5953
5954void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
5955   OCacheLine* line;
5956   UWord lineoff;
5957
5958   if (UNLIKELY(a & 7)) {
5959      /* Handle misaligned case, slowly. */
5960      MC_(helperc_b_store4)( a + 0, d32 );
5961      MC_(helperc_b_store4)( a + 4, d32 );
5962      return;
5963   }
5964
5965   lineoff = oc_line_offset(a);
5966   if (OC_ENABLE_ASSERTIONS) {
5967      tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5968   }
5969
5970   line = find_OCacheLine( a );
5971
5972   if (d32 == 0) {
5973      line->descr[lineoff + 0] = 0;
5974      line->descr[lineoff + 1] = 0;
5975   } else {
5976      line->descr[lineoff + 0] = 0xF;
5977      line->descr[lineoff + 1] = 0xF;
5978      line->w32[lineoff + 0] = d32;
5979      line->w32[lineoff + 1] = d32;
5980   }
5981}
5982
5983void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
5984   MC_(helperc_b_store8)( a + 0, d32 );
5985   MC_(helperc_b_store8)( a + 8, d32 );
5986}
5987
5988void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
5989   MC_(helperc_b_store8)( a +  0, d32 );
5990   MC_(helperc_b_store8)( a +  8, d32 );
5991   MC_(helperc_b_store8)( a + 16, d32 );
5992   MC_(helperc_b_store8)( a + 24, d32 );
5993}
5994
5995
5996/*--------------------------------------------*/
5997/*--- Origin tracking: sarp handlers       ---*/
5998/*--------------------------------------------*/
5999
6000__attribute__((noinline))
6001static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
6002   if ((a & 1) && len >= 1) {
6003      MC_(helperc_b_store1)( a, otag );
6004      a++;
6005      len--;
6006   }
6007   if ((a & 2) && len >= 2) {
6008      MC_(helperc_b_store2)( a, otag );
6009      a += 2;
6010      len -= 2;
6011   }
6012   if (len >= 4)
6013      tl_assert(0 == (a & 3));
6014   while (len >= 4) {
6015      MC_(helperc_b_store4)( a, otag );
6016      a += 4;
6017      len -= 4;
6018   }
6019   if (len >= 2) {
6020      MC_(helperc_b_store2)( a, otag );
6021      a += 2;
6022      len -= 2;
6023   }
6024   if (len >= 1) {
6025      MC_(helperc_b_store1)( a, otag );
6026      //a++;
6027      len--;
6028   }
6029   tl_assert(len == 0);
6030}
6031
6032__attribute__((noinline))
6033static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
6034   if ((a & 1) && len >= 1) {
6035      MC_(helperc_b_store1)( a, 0 );
6036      a++;
6037      len--;
6038   }
6039   if ((a & 2) && len >= 2) {
6040      MC_(helperc_b_store2)( a, 0 );
6041      a += 2;
6042      len -= 2;
6043   }
6044   if (len >= 4)
6045      tl_assert(0 == (a & 3));
6046   while (len >= 4) {
6047      MC_(helperc_b_store4)( a, 0 );
6048      a += 4;
6049      len -= 4;
6050   }
6051   if (len >= 2) {
6052      MC_(helperc_b_store2)( a, 0 );
6053      a += 2;
6054      len -= 2;
6055   }
6056   if (len >= 1) {
6057      MC_(helperc_b_store1)( a, 0 );
6058      //a++;
6059      len--;
6060   }
6061   tl_assert(len == 0);
6062}
6063
6064
6065/*------------------------------------------------------------*/
6066/*--- Setup and finalisation                               ---*/
6067/*------------------------------------------------------------*/
6068
6069static void mc_post_clo_init ( void )
6070{
6071   /* If we've been asked to emit XML, mash around various other
6072      options so as to constrain the output somewhat. */
6073   if (VG_(clo_xml)) {
6074      /* Extract as much info as possible from the leak checker. */
6075      /* MC_(clo_show_reachable) = True; */
6076      MC_(clo_leak_check) = LC_Full;
6077   }
6078
6079   if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
6080      VG_(message)(Vg_UserMsg,
6081                   "Warning: --freelist-big-blocks value %lld has no effect\n"
6082                   "as it is >= to --freelist-vol value %lld\n",
6083                   MC_(clo_freelist_big_blocks),
6084                   MC_(clo_freelist_vol));
6085
6086   tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6087
6088   if (MC_(clo_mc_level) == 3) {
6089      /* We're doing origin tracking. */
6090#     ifdef PERF_FAST_STACK
6091      VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
6092      VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
6093      VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
6094      VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
6095      VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
6096      VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
6097      VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
6098      VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
6099      VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
6100#     endif
6101      VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
6102   } else {
6103      /* Not doing origin tracking */
6104#     ifdef PERF_FAST_STACK
6105      VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
6106      VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
6107      VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
6108      VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
6109      VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
6110      VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
6111      VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
6112      VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
6113      VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
6114#     endif
6115      VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
6116   }
6117
6118   /* This origin tracking cache is huge (~100M), so only initialise
6119      if we need it. */
6120   if (MC_(clo_mc_level) >= 3) {
6121      init_OCache();
6122      tl_assert(ocacheL1 != NULL);
6123      tl_assert(ocacheL2 != NULL);
6124   } else {
6125      tl_assert(ocacheL1 == NULL);
6126      tl_assert(ocacheL2 == NULL);
6127   }
6128
6129   /* Do not check definedness of guest state if --undef-value-errors=no */
6130   if (MC_(clo_mc_level) >= 2)
6131      VG_(track_pre_reg_read) ( mc_pre_reg_read );
6132}
6133
6134static void print_SM_info(char* type, int n_SMs)
6135{
6136   VG_(message)(Vg_DebugMsg,
6137      " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
6138      type,
6139      n_SMs,
6140      n_SMs * sizeof(SecMap) / 1024UL,
6141      n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
6142}
6143
6144static void mc_fini ( Int exitcode )
6145{
6146   MC_(print_malloc_stats)();
6147
6148   if (MC_(clo_leak_check) != LC_Off) {
6149      LeakCheckParams lcp;
6150      lcp.mode = MC_(clo_leak_check);
6151      lcp.show_reachable = MC_(clo_show_reachable);
6152      lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
6153      lcp.deltamode = LCD_Any;
6154      lcp.max_loss_records_output = 999999999;
6155      lcp.requested_by_monitor_command = False;
6156      MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
6157   } else {
6158      if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6159         VG_(umsg)(
6160            "For a detailed leak analysis, rerun with: --leak-check=full\n"
6161            "\n"
6162         );
6163      }
6164   }
6165
6166   if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6167      VG_(message)(Vg_UserMsg,
6168                   "For counts of detected and suppressed errors, rerun with: -v\n");
6169   }
6170
6171   if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6172       && MC_(clo_mc_level) == 2) {
6173      VG_(message)(Vg_UserMsg,
6174                   "Use --track-origins=yes to see where "
6175                   "uninitialised values come from\n");
6176   }
6177
6178   done_prof_mem();
6179
6180   if (VG_(clo_stats)) {
6181      SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6182
6183      VG_(message)(Vg_DebugMsg,
6184         " memcheck: sanity checks: %d cheap, %d expensive\n",
6185         n_sanity_cheap, n_sanity_expensive );
6186      VG_(message)(Vg_DebugMsg,
6187         " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6188         n_auxmap_L2_nodes,
6189         n_auxmap_L2_nodes * 64,
6190         n_auxmap_L2_nodes / 16 );
6191      VG_(message)(Vg_DebugMsg,
6192         " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6193         n_auxmap_L1_searches, n_auxmap_L1_cmps,
6194         (10ULL * n_auxmap_L1_cmps)
6195            / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6196      );
6197      VG_(message)(Vg_DebugMsg,
6198         " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6199         n_auxmap_L2_searches, n_auxmap_L2_nodes
6200      );
6201
6202      print_SM_info("n_issued     ", n_issued_SMs);
6203      print_SM_info("n_deissued   ", n_deissued_SMs);
6204      print_SM_info("max_noaccess ", max_noaccess_SMs);
6205      print_SM_info("max_undefined", max_undefined_SMs);
6206      print_SM_info("max_defined  ", max_defined_SMs);
6207      print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
6208
6209      // Three DSMs, plus the non-DSM ones
6210      max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6211      // The 3*sizeof(Word) bytes is the AVL node metadata size.
6212      // The VG_ROUNDUP is because the OSet pool allocator will/must align
6213      // the elements on pointer size.
6214      // Note that the pool allocator has some additional small overhead
6215      // which is not counted in the below.
6216      // Hardwiring this logic sucks, but I don't see how else to do it.
6217      max_secVBit_szB = max_secVBit_nodes *
6218            (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
6219      max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6220
6221      VG_(message)(Vg_DebugMsg,
6222         " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
6223         max_secVBit_nodes, max_secVBit_szB / 1024,
6224                            max_secVBit_szB / (1024 * 1024));
6225      VG_(message)(Vg_DebugMsg,
6226         " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6227         sec_vbits_new_nodes + sec_vbits_updates,
6228         sec_vbits_new_nodes, sec_vbits_updates );
6229      VG_(message)(Vg_DebugMsg,
6230         " memcheck: max shadow mem size:   %ldk, %ldM\n",
6231         max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6232
6233      if (MC_(clo_mc_level) >= 3) {
6234         VG_(message)(Vg_DebugMsg,
6235                      " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
6236                      stats_ocacheL1_find,
6237                      stats_ocacheL1_misses,
6238                      stats_ocacheL1_lossage );
6239         VG_(message)(Vg_DebugMsg,
6240                      " ocacheL1: %'12lu at 0   %'12lu at 1\n",
6241                      stats_ocacheL1_find - stats_ocacheL1_misses
6242                         - stats_ocacheL1_found_at_1
6243                         - stats_ocacheL1_found_at_N,
6244                      stats_ocacheL1_found_at_1 );
6245         VG_(message)(Vg_DebugMsg,
6246                      " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
6247                      stats_ocacheL1_found_at_N,
6248                      stats_ocacheL1_movefwds );
6249         VG_(message)(Vg_DebugMsg,
6250                      " ocacheL1: %'12lu sizeB  %'12u useful\n",
6251                      (UWord)sizeof(OCache),
6252                      4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6253         VG_(message)(Vg_DebugMsg,
6254                      " ocacheL2: %'12lu refs   %'12lu misses\n",
6255                      stats__ocacheL2_refs,
6256                      stats__ocacheL2_misses );
6257         VG_(message)(Vg_DebugMsg,
6258                      " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
6259                      stats__ocacheL2_n_nodes_max,
6260                      stats__ocacheL2_n_nodes );
6261         VG_(message)(Vg_DebugMsg,
6262                      " niacache: %'12lu refs   %'12lu misses\n",
6263                      stats__nia_cache_queries, stats__nia_cache_misses);
6264      } else {
6265         tl_assert(ocacheL1 == NULL);
6266         tl_assert(ocacheL2 == NULL);
6267      }
6268   }
6269
6270   if (0) {
6271      VG_(message)(Vg_DebugMsg,
6272        "------ Valgrind's client block stats follow ---------------\n" );
6273      show_client_block_stats();
6274   }
6275}
6276
6277/* mark the given addr/len unaddressable for watchpoint implementation
6278   The PointKind will be handled at access time */
6279static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6280                                                  Addr addr, SizeT len)
6281{
6282   /* GDBTD this is somewhat fishy. We might rather have to save the previous
6283      accessibility and definedness in gdbserver so as to allow restoring it
6284      properly. Currently, we assume that the user only watches things
6285      which are properly addressable and defined */
6286   if (insert)
6287      MC_(make_mem_noaccess) (addr, len);
6288   else
6289      MC_(make_mem_defined)  (addr, len);
6290   return True;
6291}
6292
6293static void mc_pre_clo_init(void)
6294{
6295   VG_(details_name)            ("Memcheck");
6296   VG_(details_version)         (NULL);
6297   VG_(details_description)     ("a memory error detector");
6298   VG_(details_copyright_author)(
6299      "Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.");
6300   VG_(details_bug_reports_to)  (VG_BUGS_TO);
6301   VG_(details_avg_translation_sizeB) ( 640 );
6302
6303   VG_(basic_tool_funcs)          (mc_post_clo_init,
6304                                   MC_(instrument),
6305                                   mc_fini);
6306
6307   VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
6308
6309
6310   VG_(needs_core_errors)         ();
6311   VG_(needs_tool_errors)         (MC_(eq_Error),
6312                                   MC_(before_pp_Error),
6313                                   MC_(pp_Error),
6314                                   True,/*show TIDs for errors*/
6315                                   MC_(update_Error_extra),
6316                                   MC_(is_recognised_suppression),
6317                                   MC_(read_extra_suppression_info),
6318                                   MC_(error_matches_suppression),
6319                                   MC_(get_error_name),
6320                                   MC_(get_extra_suppression_info));
6321   VG_(needs_libc_freeres)        ();
6322   VG_(needs_command_line_options)(mc_process_cmd_line_options,
6323                                   mc_print_usage,
6324                                   mc_print_debug_usage);
6325   VG_(needs_client_requests)     (mc_handle_client_request);
6326   VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
6327                                   mc_expensive_sanity_check);
6328   VG_(needs_malloc_replacement)  (MC_(malloc),
6329                                   MC_(__builtin_new),
6330                                   MC_(__builtin_vec_new),
6331                                   MC_(memalign),
6332                                   MC_(calloc),
6333                                   MC_(free),
6334                                   MC_(__builtin_delete),
6335                                   MC_(__builtin_vec_delete),
6336                                   MC_(realloc),
6337                                   MC_(malloc_usable_size),
6338                                   MC_MALLOC_DEFAULT_REDZONE_SZB );
6339   MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
6340
6341   VG_(needs_xml_output)          ();
6342
6343   VG_(track_new_mem_startup)     ( mc_new_mem_startup );
6344   VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
6345   // We assume that brk()/sbrk() does not initialise new memory.  Is this
6346   // accurate?  John Reiser says:
6347   //
6348   //   0) sbrk() can *decrease* process address space.  No zero fill is done
6349   //   for a decrease, not even the fragment on the high end of the last page
6350   //   that is beyond the new highest address.  For maximum safety and
6351   //   portability, then the bytes in the last page that reside above [the
6352   //   new] sbrk(0) should be considered to be uninitialized, but in practice
6353   //   it is exceedingly likely that they will retain their previous
6354   //   contents.
6355   //
6356   //   1) If an increase is large enough to require new whole pages, then
6357   //   those new whole pages (like all new pages) are zero-filled by the
6358   //   operating system.  So if sbrk(0) already is page aligned, then
6359   //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6360   //
6361   //   2) Any increase that lies within an existing allocated page is not
6362   //   changed.  So if (x = sbrk(0)) is not page aligned, then
6363   //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6364   //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6365   //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6366   //   of them come along for the ride because the operating system deals
6367   //   only in whole pages.  Again, for maximum safety and portability, then
6368   //   anything that lives above [the new] sbrk(0) should be considered
6369   //   uninitialized, but in practice will retain previous contents [zero in
6370   //   this case.]"
6371   //
6372   // In short:
6373   //
6374   //   A key property of sbrk/brk is that new whole pages that are supplied
6375   //   by the operating system *do* get initialized to zero.
6376   //
6377   // As for the portability of all this:
6378   //
6379   //   sbrk and brk are not POSIX.  However, any system that is a derivative
6380   //   of *nix has sbrk and brk because there are too many softwares (such as
6381   //   the Bourne shell) which rely on the traditional memory map (.text,
6382   //   .data+.bss, stack) and the existence of sbrk/brk.
6383   //
6384   // So we should arguably observe all this.  However:
6385   // - The current inaccuracy has caused maybe one complaint in seven years(?)
6386   // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6387   //   doubt most programmers know the above information.
6388   // So I'm not terribly unhappy with marking it as undefined. --njn.
6389   //
6390   // [More:  I think most of what John said only applies to sbrk().  It seems
6391   // that brk() always deals in whole pages.  And since this event deals
6392   // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6393   // just mark all memory it allocates as defined.]
6394   //
6395   VG_(track_new_mem_brk)         ( make_mem_undefined_w_tid );
6396
6397   // Handling of mmap and mprotect isn't simple (well, it is simple,
6398   // but the justification isn't.)  See comments above, just prior to
6399   // mc_new_mem_mmap.
6400   VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
6401   VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6402
6403   VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
6404
6405   VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6406   VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
6407   VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
6408
6409   /* Defer the specification of the new_mem_stack functions to the
6410      post_clo_init function, since we need to first parse the command
6411      line before deciding which set to use. */
6412
6413#  ifdef PERF_FAST_STACK
6414   VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
6415   VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
6416   VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
6417   VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
6418   VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
6419   VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
6420   VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
6421   VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
6422   VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
6423#  endif
6424   VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
6425
6426   VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
6427
6428   VG_(track_pre_mem_read)        ( check_mem_is_defined );
6429   VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6430   VG_(track_pre_mem_write)       ( check_mem_is_addressable );
6431   VG_(track_post_mem_write)      ( mc_post_mem_write );
6432
6433   VG_(track_post_reg_write)                  ( mc_post_reg_write );
6434   VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6435
6436   VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
6437
6438   init_shadow_memory();
6439   MC_(chunk_poolalloc) = VG_(newPA) (sizeof(MC_Chunk),
6440                                      1000,
6441                                      VG_(malloc),
6442                                      "mc.cMC.1 (MC_Chunk pools)",
6443                                      VG_(free));
6444   MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
6445   MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6446   init_prof_mem();
6447
6448   tl_assert( mc_expensive_sanity_check() );
6449
6450   // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6451   tl_assert(sizeof(UWord) == sizeof(Addr));
6452   // Call me paranoid.  I don't care.
6453   tl_assert(sizeof(void*) == sizeof(Addr));
6454
6455   // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6456   tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6457
6458   /* This is small.  Always initialise it. */
6459   init_nia_to_ecu_cache();
6460
6461   /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6462      if we need to, since the command line args haven't been
6463      processed yet.  Hence defer it to mc_post_clo_init. */
6464   tl_assert(ocacheL1 == NULL);
6465   tl_assert(ocacheL2 == NULL);
6466
6467   /* Check some important stuff.  See extensive comments above
6468      re UNALIGNED_OR_HIGH for background. */
6469#  if VG_WORDSIZE == 4
6470   tl_assert(sizeof(void*) == 4);
6471   tl_assert(sizeof(Addr)  == 4);
6472   tl_assert(sizeof(UWord) == 4);
6473   tl_assert(sizeof(Word)  == 4);
6474   tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6475   tl_assert(MASK(1) == 0UL);
6476   tl_assert(MASK(2) == 1UL);
6477   tl_assert(MASK(4) == 3UL);
6478   tl_assert(MASK(8) == 7UL);
6479#  else
6480   tl_assert(VG_WORDSIZE == 8);
6481   tl_assert(sizeof(void*) == 8);
6482   tl_assert(sizeof(Addr)  == 8);
6483   tl_assert(sizeof(UWord) == 8);
6484   tl_assert(sizeof(Word)  == 8);
6485   tl_assert(MAX_PRIMARY_ADDRESS == 0x7FFFFFFFFULL);
6486   tl_assert(MASK(1) == 0xFFFFFFF800000000ULL);
6487   tl_assert(MASK(2) == 0xFFFFFFF800000001ULL);
6488   tl_assert(MASK(4) == 0xFFFFFFF800000003ULL);
6489   tl_assert(MASK(8) == 0xFFFFFFF800000007ULL);
6490#  endif
6491}
6492
6493VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
6494
6495/*--------------------------------------------------------------------*/
6496/*--- end                                                mc_main.c ---*/
6497/*--------------------------------------------------------------------*/
6498