1f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--------------------------------------------------------------------*/
3f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--- LibHB: a library for implementing and checking               ---*/
4f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--- the happens-before relationship in concurrent programs.      ---*/
5f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*---                                                 libhb_main.c ---*/
6f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--------------------------------------------------------------------*/
7f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
8f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*
9f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   This file is part of LibHB, a library for implementing and checking
10f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   the happens-before relationship in concurrent programs.
11f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
12ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   Copyright (C) 2008-2017 OpenWorks Ltd
13f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      info@open-works.co.uk
14f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
15f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   This program is free software; you can redistribute it and/or
16f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   modify it under the terms of the GNU General Public License as
17f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   published by the Free Software Foundation; either version 2 of the
18f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   License, or (at your option) any later version.
19f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
20f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   This program is distributed in the hope that it will be useful, but
21f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   WITHOUT ANY WARRANTY; without even the implied warranty of
22f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   General Public License for more details.
24f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
25f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   You should have received a copy of the GNU General Public License
26f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   along with this program; if not, write to the Free Software
27f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   02111-1307, USA.
29f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
30f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   The GNU General Public License is contained in the file COPYING.
31f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
32f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
33f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_basics.h"
346643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe#include "pub_tool_poolalloc.h"
35f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_libcassert.h"
36f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_libcbase.h"
37f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_libcprint.h"
38f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_mallocfree.h"
39f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_wordfm.h"
40328d6627c26471332610da3f5a0b9cc3cdd410c7philippe#include "pub_tool_hashtable.h"
41f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_xarray.h"
42f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_oset.h"
43f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_threadstate.h"
44f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_aspacemgr.h"
45328d6627c26471332610da3f5a0b9cc3cdd410c7philippe#include "pub_tool_stacktrace.h"
46f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_execontext.h"
47f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "pub_tool_errormgr.h"
485e2ac3b459ed6b1c8e6978b2ef18d73e1ad9cf14sewardj#include "pub_tool_options.h"        // VG_(clo_stats)
49f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "hg_basics.h"
50f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "hg_wordset.h"
51f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "hg_lock_n_thread.h"
52f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "hg_errors.h"
53f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
54f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#include "libhb.h"
55f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
56f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
57f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
58f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
59f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
608f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj// Debugging #defines                                          //
618f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj//                                                             //
628f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj/////////////////////////////////////////////////////////////////
638f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj/////////////////////////////////////////////////////////////////
648f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
658f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj/* Check the sanity of shadow values in the core memory state
668f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   machine.  Change #if 0 to #if 1 to enable this. */
678f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#if 0
688f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#  define CHECK_MSM 1
698f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#else
708f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#  define CHECK_MSM 0
718f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#endif
728f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
738f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
748f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj/* Check sanity (reference counts, etc) in the conflicting access
758f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   machinery.  Change #if 0 to #if 1 to enable this. */
768f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#if 0
778f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#  define CHECK_CEM 1
788f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#else
798f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#  define CHECK_CEM 0
808f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#endif
818f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
828f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
838f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj/* Check sanity in the compressed shadow memory machinery,
848f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   particularly in its caching innards.  Unfortunately there's no
858f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   almost-zero-cost way to make them selectable at run time.  Hence
868f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   set the #if 0 to #if 1 and rebuild if you want them. */
878f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#if 0
888f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#  define CHECK_ZSM 1  /* do sanity-check CacheLine stuff */
898f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#  define inline __attribute__((noinline))
908f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   /* probably want to ditch -fomit-frame-pointer too */
918f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#else
928f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#  define CHECK_ZSM 0   /* don't sanity-check CacheLine stuff */
938f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj#endif
948f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
958f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
968f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj/////////////////////////////////////////////////////////////////
978f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj/////////////////////////////////////////////////////////////////
98f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
99ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// data decls: VtsID                                           //
100f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
101f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
102f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
103f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
104ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* VtsIDs: Unique small-integer IDs for VTSs.  VtsIDs can't exceed 30
105ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   bits, since they have to be packed into the lowest 30 bits of an
106ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   SVal. */
107ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjtypedef  UInt  VtsID;
108ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define VtsID_INVALID 0xFFFFFFFF
1098f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
1108f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj
111f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
112f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
113f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
114f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
115ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// data decls: SVal                                            //
116f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
117f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
118f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
119f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
120f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef  ULong  SVal;
121f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
122f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* This value has special significance to the implementation, and callers
123f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   may not store it in the shadow memory. */
124f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define SVal_INVALID (3ULL << 62)
125f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
126f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* This is the default value for shadow memory.  Initially the shadow
127f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   memory contains no accessible areas and so all reads produce this
128f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   value.  TODO: make this caller-defineable. */
129f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define SVal_NOACCESS (2ULL << 62)
130f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
131ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
132ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
133ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
134ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
135ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
136ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// data decls: ScalarTS                                        //
137ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
138ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
139ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
140ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
141ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Scalar Timestamp.  We have to store a lot of these, so there is
142ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   some effort to make them as small as possible.  Logically they are
143ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
144ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   We pack it into 64 bits by representing the Thr* using a ThrID, a
145ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   small integer (18 bits), and a 46 bit integer for the timestamp
146ad4e979f408239dabbaae955d8ffcb84a51a5c85florian   number.  The 46/18 split is arbitrary, but has the effect that
147ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Helgrind can only handle programs that create 2^18 or fewer threads
148ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   over their entire lifetime, and have no more than 2^46 timestamp
149ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ticks (synchronisation operations on the same thread).
150ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
151ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   This doesn't seem like much of a limitation.  2^46 ticks is
152ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   7.06e+13, and if each tick (optimistically) takes the machine 1000
153ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   cycles to process, then the minimum time to process that many ticks
154ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   at a clock rate of 5 GHz is 162.9 days.  And that's doing nothing
155ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   but VTS ticks, which isn't realistic.
156ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
157328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   NB1: SCALARTS_N_THRBITS must be 27 or lower.  The obvious limit is
158328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   32 since a ThrID is a UInt.  27 comes from the fact that
159ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   'Thr_n_RCEC', which records information about old accesses, packs
160328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   in tsw not only a ThrID but also minimum 4+1 other bits (access size
161328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   and writeness) in a UInt, hence limiting size to 32-(4+1) == 27.
162ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
163ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   NB2: thrid values are issued upwards from 1024, and values less
164ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   than that aren't valid.  This isn't per se necessary (any order
165ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   will do, so long as they are unique), but it does help ensure they
166ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   are less likely to get confused with the various other kinds of
167328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   small-integer thread ids drifting around (eg, TId).
168328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   So, SCALARTS_N_THRBITS must be 11 or more.
169328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   See also NB5.
170ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
171ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   NB3: this probably also relies on the fact that Thr's are never
172ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   deallocated -- they exist forever.  Hence the 1-1 mapping from
173ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Thr's to thrid values (set up in Thr__new) persists forever.
174ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
175ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   NB4: temp_max_sized_VTS is allocated at startup and never freed.
176ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
177ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ScalarTSs.  So we can't make SCALARTS_N_THRBITS too large without
178ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   making the memory use for this go sky-high.  With
179ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
180ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   like an OK tradeoff.  If more than 256k threads need to be
181ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   supported, we could change SCALARTS_N_THRBITS to 20, which would
182ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   facilitate supporting 1 million threads at the cost of 8MB storage
183ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   for temp_max_sized_VTS.
184ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
185ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
186ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ThrID == 0 to denote an empty Thr_n_RCEC record.  So ThrID == 0
187ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   must never be a valid ThrID.  Given NB2 that's OK.
188ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj*/
189328d6627c26471332610da3f5a0b9cc3cdd410c7philippe#define SCALARTS_N_THRBITS 18  /* valid range: 11 to 27 inclusive,
190328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                                  See NB1 and NB2 above. */
191ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
192ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
193ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjtypedef
194ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   struct {
195ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      ThrID thrid : SCALARTS_N_THRBITS;
196ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      ULong tym   : SCALARTS_N_TYMBITS;
197ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
198ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ScalarTS;
199ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
200ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
201ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
202ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
203ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
204ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
205ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
206ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
207ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// data decls: Filter                                          //
208ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
209ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
210ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
211ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
212ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// baseline: 5, 9
213ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define FI_LINE_SZB_LOG2  5
214ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define FI_NUM_LINES_LOG2 10
215ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
216ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define FI_LINE_SZB       (1 << FI_LINE_SZB_LOG2)
217ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define FI_NUM_LINES      (1 << FI_NUM_LINES_LOG2)
218ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
219ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define FI_TAG_MASK        (~(Addr)(FI_LINE_SZB - 1))
220ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define FI_GET_TAG(_a)     ((_a) & FI_TAG_MASK)
221ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
222ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define FI_GET_LINENO(_a)  ( ((_a) >> FI_LINE_SZB_LOG2) \
223ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                             & (Addr)(FI_NUM_LINES-1) )
224ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
225ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
226ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* In the lines, each 8 bytes are treated individually, and are mapped
227ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   to a UShort.  Regardless of endianness of the underlying machine,
228ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
229ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   the highest address.
230ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
231ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Of each bit pair, the higher numbered bit is set if a R has been
232ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   seen, so the actual layout is:
233ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
234ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   15 14             ...  01 00
235ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
236ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   R  W  for addr+7  ...  R  W  for addr+0
237ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
238ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
239ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj*/
240ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
241ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* tags are separated from lines.  tags are Addrs and are
242ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   the base address of the line. */
243ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjtypedef
244ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   struct {
245ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
246ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
247ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   FiLine;
248ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
249ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjtypedef
250ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   struct {
251ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      Addr   tags[FI_NUM_LINES];
252ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      FiLine lines[FI_NUM_LINES];
253ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
254ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Filter;
255ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
256ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
257ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
258ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
259ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
260ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
261ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// data decls: Thr, ULong_n_EC                                 //
262ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
263ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
264ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
265ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
266ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// Records stacks for H1 history mechanism (DRD-style)
267ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjtypedef
268ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   struct { ULong ull; ExeContext* ec; }
269ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ULong_n_EC;
270ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
271ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
272ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* How many of the above records to collect for each thread?  Older
273ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ones are dumped when we run out of space.  62.5k requires 1MB per
274ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   thread, since each ULong_n_EC record is 16 bytes long.  When more
275ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   than N_KWs_N_STACKs_PER_THREAD are present, the older half are
276ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   deleted to make space.  Hence in the worst case we will be able to
277ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
278ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Kw transitions (segments in this thread).  For the current setting
279ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   that gives a guaranteed stack for at least the last 31.25k
280ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   segments. */
281ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define N_KWs_N_STACKs_PER_THREAD 62500
282ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
283ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
284ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstruct _Thr {
285ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Current VTSs for this thread.  They change as we go along.  viR
286ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      is the VTS to be used for reads, viW for writes.  Usually they
287ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      are the same, but can differ when we deal with reader-writer
288ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      locks.  It is always the case that
289ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         VtsID__cmpLEQ(viW,viR) == True
290ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      that is, viW must be the same, or lagging behind, viR. */
291ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsID viR;
292ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsID viW;
293ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
294ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Is initially False, and is set to True after the thread really
295ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      has done a low-level exit.  When True, we expect to never see
296ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      any more memory references done by this thread. */
297ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Bool llexit_done;
298ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
299ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Is initially False, and is set to True after the thread has been
300ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      joined with (reaped by some other thread).  After this point, we
301ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      do not expect to see any uses of .viR or .viW, so it is safe to
302ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      set them to VtsID_INVALID. */
303ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Bool joinedwith_done;
304ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
305ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* A small integer giving a unique identity to this Thr.  See
306ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      comments on the definition of ScalarTS for details. */
307ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ThrID thrid : SCALARTS_N_THRBITS;
308ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
309ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* A filter that removes references for which we believe that
310ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      msmcread/msmcwrite will not change the state, nor report a
311ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      race. */
312ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Filter* filter;
313ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
314ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* A pointer back to the top level Thread structure.  There is a
315ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      1-1 mapping between Thread and Thr structures -- each Thr points
316ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      at its corresponding Thread, and vice versa.  Really, Thr and
317ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      Thread should be merged into a single structure. */
318ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Thread* hgthread;
319ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
320ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* The ULongs (scalar Kws) in this accumulate in strictly
321ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      increasing order, without duplicates.  This is important because
322ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      we need to be able to find a given scalar Kw in this array
323ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      later, by binary search. */
324ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   XArray* /* ULong_n_EC */ local_Kws_n_stacks;
325ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj};
326ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
327ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
328ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
329ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
330ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
331ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
332ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// data decls: SO                                              //
333ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
334ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
335ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
336ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
337ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// (UInt) `echo "Synchronisation object" | md5sum`
338ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define SO_MAGIC 0x56b3c5b0U
339ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
340ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstruct _SO {
341ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   struct _SO* admin_prev;
342ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   struct _SO* admin_next;
343ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsID viR; /* r-clock of sender */
344ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsID viW; /* w-clock of sender */
345ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   UInt  magic;
346ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj};
347ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
348ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
349ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
350ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
351ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
352ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
353ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// Forward declarations                                        //
354ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
355ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
356ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
357ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
358ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* fwds for
359ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Globals needed by other parts of the library.  These are set
360ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   once at startup and then never changed. */
361ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic void        (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
362ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic ExeContext* (*main_get_EC)( Thr* ) = NULL;
363ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
364ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* misc fn and data fwdses */
365ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic void VtsID__rcinc ( VtsID ii );
366ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic void VtsID__rcdec ( VtsID ii );
367ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
368ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic inline Bool SVal__isC ( SVal s );
369ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic inline VtsID SVal__unC_Rmin ( SVal s );
370ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic inline VtsID SVal__unC_Wmin ( SVal s );
371ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
3721475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippestatic inline void SVal__rcinc ( SVal s );
3731475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippestatic inline void SVal__rcdec ( SVal s );
37471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe/* SVal in LineZ are used to store various pointers. */
37571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline void *SVal2Ptr (SVal s);
37671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline SVal Ptr2SVal (void* ptr);
377ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
378ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* A double linked list of all the SO's. */
379ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjSO* admin_SO;
380ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
381ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
382ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
383ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
384ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
385ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
386ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// SECTION BEGIN compressed shadow memory                      //
387ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
388ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
389ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
390ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
391ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#ifndef __HB_ZSM_H
392ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj#define __HB_ZSM_H
393ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
394f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Initialise the library.  Once initialised, it will (or may) call
3951475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
396f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   allow the user to do reference counting on the SVals stored herein.
397f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   It is important to understand, however, that due to internal
398f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   caching, the reference counts are in general inaccurate, and can be
399f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   both above or below the true reference count for an item.  In
400f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   particular, the library may indicate that the reference count for
401f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   an item is zero, when in fact it is not.
402f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
403f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   To make the reference counting exact and therefore non-pointless,
404f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   call zsm_flush_cache.  Immediately after it returns, the reference
405f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   counts for all items, as deduced by the caller by observing calls
4061475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
4071475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   zero reference count may be freed (or at least considered to be
408f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   unreferenced by this library).
409f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
4101475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippestatic void zsm_init ( void );
411f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
41223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sset_range  ( Addr, SizeT, SVal );
413f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
41423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_scopy_range ( Addr, Addr, SizeT );
415f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void zsm_flush_cache ( void );
416f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
417f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#endif /* ! __HB_ZSM_H */
418f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
419f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
420f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Round a up to the next multiple of N.  N must be a power of 2 */
421f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define ROUNDUP(a, N)   ((a + N - 1) & ~(N-1))
422f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Round a down to the next multiple of N.  N must be a power of 2 */
423f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define ROUNDDN(a, N)   ((a) & ~(N-1))
424f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
425f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* True if a belongs in range [start, start + szB[
426f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   (i.e. start + szB is excluded). */
427f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic inline Bool address_in_range (Addr a, Addr start,  SizeT szB)
428f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
429f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* Checking start <= a && a < start + szB.
430f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      As start and a are unsigned addresses, the condition can
431f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      be simplified. */
432f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (CHECK_ZSM)
433f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      tl_assert ((a - start < szB)
434f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                 == (start <= a
435f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                     &&       a < start + szB));
436f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   return a - start < szB;
437f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
438f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
439f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* ------ CacheLine ------ */
440f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
441f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_LINE_BITS      6 /* must be >= 3 */
442f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_LINE_ARANGE    (1 << N_LINE_BITS)
443f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_LINE_TREES     (N_LINE_ARANGE >> 3)
444f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
445f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
446f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct {
447f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      UShort descrs[N_LINE_TREES];
448f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
449f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
450f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine;
451f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
452f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_16_0 (1<<0)
453f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_32_0 (1<<1)
454f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_16_1 (1<<2)
455f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_64   (1<<3)
456f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_16_2 (1<<4)
457f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_32_1 (1<<5)
458f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_16_3 (1<<6)
459f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_8_0  (1<<7)
460f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_8_1  (1<<8)
461f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_8_2  (1<<9)
462f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_8_3  (1<<10)
463f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_8_4  (1<<11)
464f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_8_5  (1<<12)
465f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_8_6  (1<<13)
466f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_8_7  (1<<14)
467f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define TREE_DESCR_DTY  (1<<15)
468f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
469f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
470f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct {
471f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal  dict[4]; /* can represent up to 4 diff values in the line */
472f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
473f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                      dict indexes */
47471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
475f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         LineF to use, and dict[2..] are also SVal_INVALID. */
476f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
477f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineZ; /* compressed rep for a cache line */
478f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
47971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe/* LineZ.dict[1] is used to store various pointers:
48071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   * In the first lineZ of a free SecMap, it points to the next free SecMap.
48171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   * In a lineZ for which we need to use a lineF, it points to the lineF. */
48271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
48371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
484f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
485f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct {
486f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal w64s[N_LINE_ARANGE];
487f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
488f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineF; /* full rep for a cache line */
489f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
49071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe/* We use a pool allocator for LineF, as LineF is relatively small,
49171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   and we will often alloc/release such lines. */
49271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic PoolAlloc* LineF_pool_allocator;
49371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
49471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe/* SVal in a lineZ are used to store various pointers.
49571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   Below are conversion functions to support that. */
49671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline LineF *LineF_Ptr (LineZ *lineZ)
49771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe{
49871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   tl_assert(lineZ->dict[0] == SVal_INVALID);
49971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   return SVal2Ptr (lineZ->dict[1]);
50071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe}
50171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
502f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Shadow memory.
503f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Primary map is a WordFM Addr SecMap*.
504f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SecMaps cover some page-size-ish section of address space and hold
505f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj     a compressed representation.
506f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine-sized chunks of SecMaps are copied into a Cache, being
507f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   decompressed when moved into the cache and recompressed on the
508f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   way out.  Because of this, the cache must operate as a writeback
509f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cache, not a writethrough one.
510f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
511f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Each SecMap must hold a power-of-2 number of CacheLines.  Hence
512f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   N_SECMAP_BITS must >= N_LINE_BITS.
513f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
514f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_SECMAP_BITS   13
515f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
516f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
517f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// # CacheLines held by a SecMap
518f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
519f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
520f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* The data in the SecMap is held in the array of LineZs.  Each LineZ
521f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   either carries the required data directly, in a compressed
52271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   representation, or it holds (in .dict[1]) a pointer to a LineF
52371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   that holds the full representation.
524f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
52571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   As each in-use LineF is referred to by exactly one LineZ,
52671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   the number of .linesZ[] that refer to a lineF should equal
52771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   the number of used lineF.
528f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
529f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   RC obligations: the RCs presented to the user include exactly
530f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   the values in:
531f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
53271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   * F reps that are in use
533f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
534f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Hence the following actions at the following transitions are required:
535f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
53671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   F rep: alloc'd       -> freed                -- rcdec_LineF
53771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   F rep:               -> alloc'd              -- rcinc_LineF
538f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Z rep: .dict[0] from other to SVal_INVALID   -- rcdec_LineZ
539f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Z rep: .dict[0] from SVal_INVALID to other   -- rcinc_LineZ
540f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
54171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
542f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
543f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct {
544f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      UInt   magic;
545f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      LineZ  linesZ[N_SECMAP_ZLINES];
546f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
547f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SecMap;
548f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
549f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define SecMap_MAGIC   0x571e58cbU
550f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
551f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe// (UInt) `echo "Free SecMap" | md5sum`
552f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe#define SecMap_free_MAGIC 0x5a977f30U
553f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
5545aa09bf5e5e9ab4cd24689f80937a9268b7aaebasewardj__attribute__((unused))
555f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Bool is_sane_SecMap ( SecMap* sm ) {
556f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return sm != NULL && sm->magic == SecMap_MAGIC;
557f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
558f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
559f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* ------ Cache ------ */
560f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
561f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_WAY_BITS 16
562f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_WAY_NENT (1 << N_WAY_BITS)
563f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
564f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Each tag is the address of the associated CacheLine, rounded down
565f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   to a CacheLine address boundary.  A CacheLine size must be a power
566f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   of 2 and must be 8 or more.  Hence an easy way to initialise the
567f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cache so it is empty is to set all the tag values to any value % 8
568f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   != 0, eg 1.  This means all queries in the cache initially miss.
569f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   It does however require us to detect and not writeback, any line
570f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   with a bogus tag. */
571f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
572f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct {
573f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      CacheLine lyns0[N_WAY_NENT];
574f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      Addr      tags0[N_WAY_NENT];
575f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
576f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Cache;
577f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
578f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Bool is_valid_scache_tag ( Addr tag ) {
579f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* a valid tag should be naturally aligned to the start of
580f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a CacheLine. */
581f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return 0 == (tag & (N_LINE_ARANGE - 1));
582f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
583f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
584f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
585f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* --------- Primary data structures --------- */
586f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
587f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Shadow memory primary map */
588f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
589f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Cache   cache_shmem;
590f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
591f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
592f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__secmaps_search       = 0; // # SM finds
593f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__secmaps_search_slow  = 0; // # SM lookupFMs
594f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__secmaps_allocd       = 0; // # SecMaps issued
595f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
596f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord stats__secmaps_scanGC       = 0; // # nr of scan GC done.
597f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord stats__secmaps_scanGCed     = 0; // # SecMaps GC-ed via scan
598f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord stats__secmaps_ssetGCed     = 0; // # SecMaps GC-ed via setnoaccess
599f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
600f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
601f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__secmap_linesZ_bytes  = 0; // .. using this much storage
602f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cache_Z_fetches      = 0; // # Z lines fetched
603f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cache_Z_wbacks       = 0; // # Z lines written back
604f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cache_F_fetches      = 0; // # F lines fetched
605f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cache_F_wbacks       = 0; // # F lines written back
606f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
607f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cache_totrefs        = 0; // # total accesses
608f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cache_totmisses      = 0; // # misses
609f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic ULong stats__cache_make_New_arange = 0; // total arange made New
610f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
611f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cline_normalises     = 0; // # calls to cacheline_normalise
61223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_cread64s       = 0; // # calls to s_m_read64
61323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_cread32s       = 0; // # calls to s_m_read32
61423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_cread16s       = 0; // # calls to s_m_read16
61523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_cread08s       = 0; // # calls to s_m_read8
61623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_cwrite64s      = 0; // # calls to s_m_write64
61723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_cwrite32s      = 0; // # calls to s_m_write32
61823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_cwrite16s      = 0; // # calls to s_m_write16
61923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_cwrite08s      = 0; // # calls to s_m_write8
62023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_sread08s       = 0; // # calls to s_m_set8
62123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_swrite08s      = 0; // # calls to s_m_get8
62223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_swrite16s      = 0; // # calls to s_m_get8
62323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_swrite32s      = 0; // # calls to s_m_get8
62423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_swrite64s      = 0; // # calls to s_m_get8
62523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__cline_scopy08s       = 0; // # calls to s_m_copy8
626f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cline_64to32splits   = 0; // # 64-bit accesses split
627f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cline_32to16splits   = 0; // # 32-bit accesses split
628f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cline_16to8splits    = 0; // # 16-bit accesses split
629f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
630f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
631f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__cline_16to8pulldown  = 0; // # calls to pulldown_to_8
632c8028add6294793dfc80a80d920c7dba3a89f312sewardjstatic UWord stats__vts__tick            = 0; // # calls to VTS__tick
633c8028add6294793dfc80a80d920c7dba3a89f312sewardjstatic UWord stats__vts__join            = 0; // # calls to VTS__join
634c8028add6294793dfc80a80d920c7dba3a89f312sewardjstatic UWord stats__vts__cmpLEQ          = 0; // # calls to VTS__cmpLEQ
635c8028add6294793dfc80a80d920c7dba3a89f312sewardjstatic UWord stats__vts__cmp_structural  = 0; // # calls to VTS__cmp_structural
636f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord stats__vts_tab_GC           = 0; // # nr of vts_tab GC
6372bd2326e8361a752dfbd4eced9a61b6224f05272philippestatic UWord stats__vts_pruning          = 0; // # nr of vts pruning
6387aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
6397aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj// # calls to VTS__cmp_structural w/ slow case
6407aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic UWord stats__vts__cmp_structural_slow = 0;
6417aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
6427aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj// # calls to VTS__indexAt_SLOW
6437aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic UWord stats__vts__indexat_slow = 0;
6447aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
6457aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj// # calls to vts_set__find__or__clone_and_add
6467aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic UWord stats__vts_set__focaa    = 0;
6477aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
6487aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj// # calls to vts_set__find__or__clone_and_add that lead to an
6497aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj// allocation
6507aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic UWord stats__vts_set__focaa_a  = 0;
651c8028add6294793dfc80a80d920c7dba3a89f312sewardj
652f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
653f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Addr shmem__round_to_SecMap_base ( Addr a ) {
654f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return a & ~(N_SECMAP_ARANGE - 1);
655f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
656f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline UWord shmem__get_SecMap_offset ( Addr a ) {
657f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return a & (N_SECMAP_ARANGE - 1);
658f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
659f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
660f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
661f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*----------------------------------------------------------------*/
662f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--- map_shmem :: WordFM Addr SecMap                          ---*/
663f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--- shadow memory (low level handlers) (shmem__* fns)        ---*/
664f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*----------------------------------------------------------------*/
665f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
666f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--------------- SecMap allocation --------------- */
667f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
668f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic HChar* shmem__bigchunk_next = NULL;
669f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic HChar* shmem__bigchunk_end1 = NULL;
670f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
671f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void* shmem__bigchunk_alloc ( SizeT n )
672f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
673f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
674f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(n > 0);
675f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   n = VG_ROUNDUP(n, 16);
676f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
677f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
678f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj             <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
679f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
680f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (0)
681f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
682f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
683f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
684f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (shmem__bigchunk_next == NULL)
685f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         VG_(out_of_memory_NORETURN)(
686f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
687f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
688f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
689f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(shmem__bigchunk_next);
690f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
691f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
692f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   shmem__bigchunk_next += n;
693f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return shmem__bigchunk_next - n;
694f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
695f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
696f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
697f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   recycled SecMap. When a new SecMap is needed, a recycled SecMap
698f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   will be used in preference to allocating a new SecMap. */
69971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe/* We make a linked list of SecMap. The first LineZ is re-used to
70071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   implement the linked list. */
70171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe/* Returns the SecMap following sm in the free list.
70271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   NULL if sm is the last SecMap. sm must be on the free list. */
70371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline SecMap *SecMap_freelist_next ( SecMap* sm )
70471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe{
70571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   tl_assert (sm);
70671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   tl_assert (sm->magic == SecMap_free_MAGIC);
70771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   return SVal2Ptr (sm->linesZ[0].dict[1]);
70871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe}
70971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
71071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe{
71171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   tl_assert (sm);
71271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   tl_assert (sm->magic == SecMap_free_MAGIC);
71371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
71471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   sm->linesZ[0].dict[1] = Ptr2SVal (next);
71571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe}
71671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
717f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic SecMap *SecMap_freelist = NULL;
718f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord SecMap_freelist_length(void)
719f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
720f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SecMap *sm;
721f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   UWord n = 0;
722f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
723f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   sm = SecMap_freelist;
724f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   while (sm) {
725f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe     n++;
72671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe     sm = SecMap_freelist_next (sm);
727f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
728f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   return n;
729f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
730f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
731f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic void push_SecMap_on_freelist(SecMap* sm)
732f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
733f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
734f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   sm->magic = SecMap_free_MAGIC;
73571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   set_SecMap_freelist_next(sm, SecMap_freelist);
736f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SecMap_freelist = sm;
737f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
738f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* Returns a free SecMap if there is one.
739f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Otherwise, returns NULL. */
740f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic SecMap *pop_SecMap_from_freelist(void)
741f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
742f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SecMap *sm;
743f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
744f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   sm = SecMap_freelist;
745f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (sm) {
746f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      tl_assert (sm->magic == SecMap_free_MAGIC);
74771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      SecMap_freelist = SecMap_freelist_next (sm);
748f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
749f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
750f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   return sm;
751f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
752f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
753f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic SecMap* shmem__alloc_or_recycle_SecMap ( void )
754f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
755f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word    i, j;
756f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SecMap* sm = pop_SecMap_from_freelist();
757f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
758f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (!sm) {
759f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      sm = shmem__bigchunk_alloc( sizeof(SecMap) );
760f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      stats__secmaps_allocd++;
761f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
762f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
763f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
764f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
765f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (0) VG_(printf)("alloc_SecMap %p\n",sm);
766f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(sm);
767f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   sm->magic = SecMap_MAGIC;
768f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_SECMAP_ZLINES; i++) {
769f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      sm->linesZ[i].dict[0] = SVal_NOACCESS;
770f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      sm->linesZ[i].dict[1] = SVal_INVALID;
771f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      sm->linesZ[i].dict[2] = SVal_INVALID;
772f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      sm->linesZ[i].dict[3] = SVal_INVALID;
773f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      for (j = 0; j < N_LINE_ARANGE/4; j++)
774f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
775f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
776f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return sm;
777f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
778f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
779f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
780f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
781f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
782f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic SecMap* shmem__find_SecMap ( Addr ga )
783f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
784f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SecMap* sm    = NULL;
785f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Addr    gaKey = shmem__round_to_SecMap_base(ga);
786f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   // Cache
787f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__secmaps_search++;
788f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (LIKELY(gaKey == smCache[0].gaKey))
789f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return smCache[0].sm;
790f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (LIKELY(gaKey == smCache[1].gaKey)) {
791f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SMCacheEnt tmp = smCache[0];
792f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      smCache[0] = smCache[1];
793f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      smCache[1] = tmp;
794f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return smCache[0].sm;
795f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
796f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (gaKey == smCache[2].gaKey) {
797f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SMCacheEnt tmp = smCache[1];
798f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      smCache[1] = smCache[2];
799f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      smCache[2] = tmp;
800f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return smCache[1].sm;
801f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
802f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   // end Cache
803f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__secmaps_search_slow++;
804f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (VG_(lookupFM)( map_shmem,
805f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                      NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
806f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(sm != NULL);
807f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      smCache[2] = smCache[1];
808f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      smCache[1] = smCache[0];
809f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      smCache[0].gaKey = gaKey;
810f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      smCache[0].sm    = sm;
811f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
812f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(sm == NULL);
813f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
814f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return sm;
815f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
816f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
817f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* Scan the SecMap and count the SecMap that can be GC-ed.
818f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   If really, really does the GC of the SecMap. */
819f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* NOT TO BE CALLED FROM WITHIN libzsm. */
820f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord next_SecMap_GC_at = 1000;
821f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe__attribute__((noinline))
822f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic UWord shmem__SecMap_do_GC(Bool really)
823f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
824f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   UWord secmapW = 0;
825f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Addr  gaKey;
826f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   UWord examined = 0;
827f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   UWord ok_GCed = 0;
828f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
829f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* First invalidate the smCache */
830f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   smCache[0].gaKey = 1;
831f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   smCache[1].gaKey = 1;
832f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   smCache[2].gaKey = 1;
833f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
834f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
835f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   VG_(initIterFM)( map_shmem );
836f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
837f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      UWord   i;
838f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      UWord   j;
83971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      UWord   n_linesF = 0;
840f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      SecMap* sm = (SecMap*)secmapW;
841f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      tl_assert(sm->magic == SecMap_MAGIC);
842f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      Bool ok_to_GC = True;
843f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
844f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      examined++;
845f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
84671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      /* Deal with the LineZs and the possible LineF of a LineZ. */
847f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
848f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         LineZ* lineZ = &sm->linesZ[i];
84971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         if (lineZ->dict[0] != SVal_INVALID) {
85071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe            ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
851f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe               && !SVal__isC (lineZ->dict[1])
852f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe               && !SVal__isC (lineZ->dict[2])
85371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               && !SVal__isC (lineZ->dict[3]);
85471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         } else {
85571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe            LineF *lineF = LineF_Ptr(lineZ);
85671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe            n_linesF++;
85771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe            for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
85871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
85971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         }
860f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
861f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (ok_to_GC)
862f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         ok_GCed++;
863f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (ok_to_GC && really) {
864f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        SecMap *fm_sm;
865f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        Addr fm_gaKey;
866f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        /* We cannot remove a SecMap from map_shmem while iterating.
867f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe           So, stop iteration, remove from map_shmem, recreate the iteration
868f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe           on the next SecMap. */
869f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        VG_(doneIterFM) ( map_shmem );
87071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe        /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
87171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe           We just need to free the lineF referenced by the linesZ. */
87271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe        if (n_linesF > 0) {
87371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe           for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
87471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe              LineZ* lineZ = &sm->linesZ[i];
87571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe              if (lineZ->dict[0] == SVal_INVALID) {
87671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
87771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                 n_linesF--;
87871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe              }
87971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe           }
880f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        }
881f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
882f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe          tl_assert (0);
883f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        stats__secmaps_in_map_shmem--;
884f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        tl_assert (gaKey == fm_gaKey);
885f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        tl_assert (sm == fm_sm);
886f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        stats__secmaps_scanGCed++;
887f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        push_SecMap_on_freelist (sm);
888f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe        VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
889f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
890f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
891f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   VG_(doneIterFM)( map_shmem );
892f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
893f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (really) {
894f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      stats__secmaps_scanGC++;
895f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      /* Next GC when we approach the max allocated */
896f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      next_SecMap_GC_at = stats__secmaps_allocd - 1000;
897f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      /* Unless we GCed less than 10%. We then allow to alloc 10%
898f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         more before GCing. This avoids doing a lot of costly GC
899f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         for the worst case : the 'growing phase' of an application
900f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         that allocates a lot of memory.
901f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         Worst can can be reproduced e.g. by
902f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe             perf/memrw -t 30000000 -b 1000 -r 1 -l 1
903f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         that allocates around 30Gb of memory. */
904f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (ok_GCed < stats__secmaps_allocd/10)
905f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
906f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
907f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
908f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
909f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (VG_(clo_stats) && really) {
910f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(message)(Vg_DebugMsg,
911f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
912f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   " next GC at %lu\n",
913f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   stats__secmaps_scanGC, examined, ok_GCed,
914f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   next_SecMap_GC_at);
915f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
916f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
917f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   return ok_GCed;
918f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
919f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
920f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
921f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
922f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SecMap* sm = shmem__find_SecMap ( ga );
923f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (LIKELY(sm)) {
924f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
925f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return sm;
926f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
927f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* create a new one */
928f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      Addr gaKey = shmem__round_to_SecMap_base(ga);
929f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      sm = shmem__alloc_or_recycle_SecMap();
930f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(sm);
931f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
932f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      stats__secmaps_in_map_shmem++;
933f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
934f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return sm;
935f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
936f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
937f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
9380fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe/* Returns the nr of linesF which are in use. Note: this is scanning
9390fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   the secmap wordFM. So, this is to be used for statistics only. */
9400fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe__attribute__((noinline))
9410fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippestatic UWord shmem__SecMap_used_linesF(void)
9420fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe{
9430fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   UWord secmapW = 0;
9440fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   Addr  gaKey;
9450fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   UWord inUse = 0;
9460fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe
9470fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   VG_(initIterFM)( map_shmem );
9480fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
9490fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe      UWord   i;
9500fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe      SecMap* sm = (SecMap*)secmapW;
9510fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe      tl_assert(sm->magic == SecMap_MAGIC);
9520fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe
95371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      for (i = 0; i < N_SECMAP_ZLINES; i++) {
95471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         LineZ* lineZ = &sm->linesZ[i];
95571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         if (lineZ->dict[0] == SVal_INVALID)
9560fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe            inUse++;
9570fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe      }
9580fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   }
9590fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   VG_(doneIterFM)( map_shmem );
9600fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe
9610fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe   return inUse;
9620fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe}
963f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
964f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* ------------ LineF and LineZ related ------------ */
965f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
966f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void rcinc_LineF ( LineF* lineF ) {
967f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord i;
968f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_LINE_ARANGE; i++)
9691475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe      SVal__rcinc(lineF->w64s[i]);
970f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
971f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
972f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void rcdec_LineF ( LineF* lineF ) {
973f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord i;
974f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_LINE_ARANGE; i++)
9751475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe      SVal__rcdec(lineF->w64s[i]);
976f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
977f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
978f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void rcinc_LineZ ( LineZ* lineZ ) {
979f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(lineZ->dict[0] != SVal_INVALID);
9801475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   SVal__rcinc(lineZ->dict[0]);
9811475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
9821475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
9831475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
984f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
985f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
986f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void rcdec_LineZ ( LineZ* lineZ ) {
987f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(lineZ->dict[0] != SVal_INVALID);
9881475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   SVal__rcdec(lineZ->dict[0]);
9891475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
9901475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
9911475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
992f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
993f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
994f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjinline
995f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
996f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word bix, shft, mask, prep;
997f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ix >= 0);
998f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   bix  = ix >> 2;
999f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1000f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   mask = 3 << shft;
1001f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   prep = b2 << shft;
1002f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   arr[bix] = (arr[bix] & ~mask) | prep;
1003f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1004f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1005f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjinline
1006f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord read_twobit_array ( UChar* arr, UWord ix ) {
1007f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word bix, shft;
1008f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ix >= 0);
1009f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   bix  = ix >> 2;
1010f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1011f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return (arr[bix] >> shft) & 3;
1012f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1013f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
10147ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe/* We cache one free lineF, to avoid pool allocator calls.
10157ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe   Measurement on firefox has shown that this avoids more than 90%
10167ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe   of the PA calls. */
10177ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippestatic LineF *free_lineF = NULL;
10187ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe
101971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe/* Allocates a lineF for LineZ. Sets lineZ in a state indicating
102071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   lineF has to be used. */
102171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
102271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe{
102371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   LineF *lineF;
102471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
102571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   tl_assert(lineZ->dict[0] == SVal_INVALID);
102671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
10277ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe   if (LIKELY(free_lineF)) {
10287ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe      lineF = free_lineF;
10297ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe      free_lineF = NULL;
10307ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe   } else {
10317ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe      lineF = VG_(allocEltPA) ( LineF_pool_allocator );
10327ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe   }
103371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
103471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   lineZ->dict[1] = Ptr2SVal (lineF);
103571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
103671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   return lineF;
103771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe}
103871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
103971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe/* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
104071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   back to its initial state SVal_NOACCESS (i.e. ready to be
104171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   read or written just after SecMap allocation). */
104271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline void clear_LineF_of_Z (LineZ *lineZ)
104371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe{
104471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   LineF *lineF = LineF_Ptr(lineZ);
104571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
104671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   rcdec_LineF(lineF);
10477ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe   if (UNLIKELY(free_lineF)) {
10487ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe      VG_(freeEltPA)( LineF_pool_allocator, lineF );
10497ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe   } else {
10507ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe      free_lineF = lineF;
10517ab06467ab27ed51b829a724e5c40cb0dc19d88dphilippe   }
105271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   lineZ->dict[0] = SVal_NOACCESS;
105371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   lineZ->dict[1] = SVal_INVALID;
105471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe}
105571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
1056f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Given address 'tag', find either the Z or F line containing relevant
1057f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   data, so it can be read into the cache.
1058f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
1059f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1060f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                  /*OUT*/LineF** fp, Addr tag ) {
1061f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineZ* lineZ;
1062f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineF* lineF;
1063f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord   zix;
1064f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
1065f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord   smoff = shmem__get_SecMap_offset(tag);
1066f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* since smoff is derived from a valid tag, it should be
1067f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cacheline-aligned. */
1068f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1069f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   zix = smoff >> N_LINE_BITS;
1070f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(zix < N_SECMAP_ZLINES);
1071f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineZ = &sm->linesZ[zix];
1072f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineF = NULL;
1073f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (lineZ->dict[0] == SVal_INVALID) {
107471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      lineF = LineF_Ptr (lineZ);
1075f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      lineZ = NULL;
1076f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1077f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   *zp = lineZ;
1078f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   *fp = lineF;
1079f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1080f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1081f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Given address 'tag', return the relevant SecMap and the index of
1082f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   the LineZ within it, in the expectation that the line is to be
1083f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   overwritten.  Regardless of whether 'tag' is currently associated
1084f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   with a Z or F representation, to rcdec on the current
1085f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   representation, in recognition of the fact that the contents are
1086f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   just about to be overwritten. */
1087f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic __attribute__((noinline))
1088f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjvoid find_Z_for_writing ( /*OUT*/SecMap** smp,
1089f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          /*OUT*/Word* zixp,
1090f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          Addr tag ) {
1091f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineZ* lineZ;
1092f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord   zix;
1093f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
1094f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord   smoff = shmem__get_SecMap_offset(tag);
1095f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* since smoff is derived from a valid tag, it should be
1096f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cacheline-aligned. */
1097f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1098f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   zix = smoff >> N_LINE_BITS;
1099f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(zix < N_SECMAP_ZLINES);
1100f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineZ = &sm->linesZ[zix];
110171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
110271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      can be parked in it.  Hence have to rcdec it accordingly. */
1103f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* If lineZ has an associated lineF, free it up. */
110471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   if (lineZ->dict[0] == SVal_INVALID)
110571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      clear_LineF_of_Z(lineZ);
110671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   else
1107f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      rcdec_LineZ(lineZ);
1108f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   *smp  = sm;
1109f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   *zixp = zix;
1110f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1111f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1112f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* ------------ CacheLine and implicit-tree related ------------ */
1113f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1114f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj__attribute__((unused))
1115f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void pp_CacheLine ( CacheLine* cl ) {
1116f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word i;
1117f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!cl) {
1118f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","pp_CacheLine(NULL)\n");
1119f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
1120f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1121f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_LINE_TREES; i++)
1122f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("   descr: %04lx\n", (UWord)cl->descrs[i]);
1123f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_LINE_ARANGE; i++)
1124f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("    sval: %08lx\n", (UWord)cl->svals[i]);
1125f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1126f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1127f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UChar descr_to_validbits ( UShort descr )
1128f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1129f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* a.k.a Party Time for gcc's constant folder */
1130f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#  define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1131f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0)  \
1132f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj             ( (UShort) ( ( (b8_7)  << 14) | ( (b8_6)  << 13) | \
1133f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          ( (b8_5)  << 12) | ( (b8_4)  << 11) | \
1134f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          ( (b8_3)  << 10) | ( (b8_2)  << 9)  | \
1135f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          ( (b8_1)  << 8)  | ( (b8_0)  << 7)  | \
1136f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          ( (b16_3) << 6)  | ( (b32_1) << 5)  | \
1137f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          ( (b16_2) << 4)  | ( (b64)   << 3)  | \
1138f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          ( (b16_1) << 2)  | ( (b32_0) << 1)  | \
1139f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                          ( (b16_0) << 0) ) )
1140f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1141f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#  define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1142f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj             ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1143f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                         ( (bit5) << 5) | ( (bit4) << 4) | \
1144f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                         ( (bit3) << 3) | ( (bit2) << 2) | \
1145f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                         ( (bit1) << 1) | ( (bit0) << 0) ) )
1146f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1147f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* these should all get folded out at compile time */
1148f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1149f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1150f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1151f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1152f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1153f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1154f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1155f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1156f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1157f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1158f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   switch (descr) {
1159f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /*
1160f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              +--------------------------------- TREE_DESCR_8_7
1161f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             +------------------- TREE_DESCR_8_0
1162f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  +---------------- TREE_DESCR_16_3
1163f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  | +-------------- TREE_DESCR_32_1
1164f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  | | +------------ TREE_DESCR_16_2
1165f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  | | |  +--------- TREE_DESCR_64
1166f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  | | |  |  +------ TREE_DESCR_16_1
1167f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  | | |  |  | +---- TREE_DESCR_32_0
1168f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  | | |  |  | | +-- TREE_DESCR_16_0
1169f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  | | |  |  | | |
1170f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              |             |  | | |  |  | | |   GRANULARITY, 7 -> 0 */
1171f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8  8 8 8 8 */
1172f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,1,1,1,1,1,1);
1173f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16   8 8 8 8 */
1174f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,0,1,1,1,1,1);
1175f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16  8 8  8 8 8 8 */
1176f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,1,1,1,1,1,1);
1177f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16  16   8 8 8 8 */
1178f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,0,1,1,1,1,1);
1179f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1180f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8  8 8 16 */
1181f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,1,1,1,1,0,1);
1182f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16   8 8 16 */
1183f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,0,1,1,1,0,1);
1184f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16  8 8  8 8 16 */
1185f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,1,1,1,1,0,1);
1186f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16  16   8 8 16 */
1187f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,0,1,1,1,0,1);
1188f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1189f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8  16 8 8 */
1190f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,1,1,0,1,1,1);
1191f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16   16 8 8 */
1192f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,0,1,0,1,1,1);
1193f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16  8 8  16 8 8 */
1194f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,1,1,0,1,1,1);
1195f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16  16   16 8 8 */
1196f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,0,1,0,1,1,1);
1197f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1198f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8  16 16 */
1199f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,1,1,0,1,0,1);
1200f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16   16 16 */
1201f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,0,1,0,1,0,1);
1202f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16  8 8  16 16 */
1203f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,1,1,0,1,0,1);
1204f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16  16   16 16 */
1205f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,0,1,0,1,0,1);
1206f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1207f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32  8 8 8 8 */
1208f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,0,0,1,1,1,1,1);
1209f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32  8 8 16  */
1210f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,0,0,1,1,1,0,1);
1211f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32  16  8 8 */
1212f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,0,0,1,0,1,1,1);
1213f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32  16  16  */
1214f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,0,0,1,0,1,0,1);
1215f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1216f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8  32 */
1217f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,1,1,0,0,0,1);
1218f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16   32 */
1219f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(1,1,0,1,0,0,0,1);
1220f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16  8 8  32 */
1221f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,1,1,0,0,0,1);
1222f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16  16   32 */
1223f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,1,0,1,0,0,0,1);
1224f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1225f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1226f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,0,0,1,0,0,0,1);
1227f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1228f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1229f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                                 return BYTE(0,0,0,0,0,0,0,1);
1230f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1231f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   default: return BYTE(0,0,0,0,0,0,0,0);
1232f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                   /* INVALID - any valid descr produces at least one
1233f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                      valid bit in tree[0..7]*/
1234f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1235f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* NOTREACHED*/
1236f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(0);
1237f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1238f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#  undef DESCR
1239f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#  undef BYTE
1240f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1241f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1242f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj__attribute__((unused))
1243f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Bool is_sane_Descr ( UShort descr ) {
1244f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return descr_to_validbits(descr) != 0;
1245f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1246f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1247f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1248f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(sprintf)(dst,
1249f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1250f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1251f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1252f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1253f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1254f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1255f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1256f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1257f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1258f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1259f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1260f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1261f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_64)   ? 1 : 0),
1262f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1263f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1264f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1265f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   );
1266f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1267f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1268f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1269f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                     (Int)((byte & 128) ? 1 : 0),
1270f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                     (Int)((byte &  64) ? 1 : 0),
1271f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                     (Int)((byte &  32) ? 1 : 0),
1272f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                     (Int)((byte &  16) ? 1 : 0),
1273f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                     (Int)((byte &   8) ? 1 : 0),
1274f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                     (Int)((byte &   4) ? 1 : 0),
1275f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                     (Int)((byte &   2) ? 1 : 0),
1276f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                     (Int)((byte &   1) ? 1 : 0)
1277f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   );
1278f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1279f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1280f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1281f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word  i;
1282f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UChar validbits = descr_to_validbits(descr);
12837b7d59405204f88cb944155d6bc5114025ebda98florian   HChar buf[128], buf2[128];    // large enough
1284f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (validbits == 0)
1285f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      goto bad;
1286f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < 8; i++) {
1287f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (validbits & (1<<i)) {
1288f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (tree[i] == SVal_INVALID)
1289f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            goto bad;
1290f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
1291f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (tree[i] != SVal_INVALID)
1292f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            goto bad;
1293f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1294f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1295f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return True;
1296f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  bad:
1297f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   sprintf_Descr( buf, descr );
1298f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   sprintf_Byte( buf2, validbits );
1299f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1300f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("   validbits 0x%02lx    %s\n", (UWord)validbits, buf2);
1301f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("       descr 0x%04lx  %s\n", (UWord)descr, buf);
1302f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < 8; i++)
1303f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("   [%ld] 0x%016llx\n", i, tree[i]);
1304f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("%s","}\n");
1305f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return 0;
1306f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1307f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1308f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Bool is_sane_CacheLine ( CacheLine* cl )
1309f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1310f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word tno, cloff;
1311f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1312f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!cl) goto bad;
1313f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1314f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1315f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      UShort descr = cl->descrs[tno];
1316f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal*  tree  = &cl->svals[cloff];
1317f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (!is_sane_Descr_and_Tree(descr, tree))
1318f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto bad;
1319f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1320f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(cloff == N_LINE_ARANGE);
1321f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return True;
1322f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  bad:
1323f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   pp_CacheLine(cl);
1324f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return False;
1325f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1326f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1327f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UShort normalise_tree ( /*MOD*/SVal* tree )
1328f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1329f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort descr;
1330f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* pre: incoming tree[0..7] does not have any invalid shvals, in
1331f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      particular no zeroes. */
13321475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   if (CHECK_ZSM
13331475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe       && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
13341475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe                   || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
13351475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe                   || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
13361475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe                   || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1337f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(0);
1338f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1339f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1340f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj           | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1341f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj           | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1342f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* build 16-bit layer */
1343f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (tree[1] == tree[0]) {
1344f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tree[1] = SVal_INVALID;
1345f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1346f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr |= TREE_DESCR_16_0;
1347f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1348f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (tree[3] == tree[2]) {
1349f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tree[3] = SVal_INVALID;
1350f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1351f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr |= TREE_DESCR_16_1;
1352f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1353f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (tree[5] == tree[4]) {
1354f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tree[5] = SVal_INVALID;
1355f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1356f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr |= TREE_DESCR_16_2;
1357f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1358f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (tree[7] == tree[6]) {
1359f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tree[7] = SVal_INVALID;
1360f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1361f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr |= TREE_DESCR_16_3;
1362f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1363f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* build 32-bit layer */
1364f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (tree[2] == tree[0]
1365f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj       && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1366f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1367f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1368f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr |= TREE_DESCR_32_0;
1369f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1370f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (tree[6] == tree[4]
1371f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj       && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1372f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1373f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1374f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr |= TREE_DESCR_32_1;
1375f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1376f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* build 64-bit layer */
1377f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (tree[4] == tree[0]
1378f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj       && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1379f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1380f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1381f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      descr |= TREE_DESCR_64;
1382f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1383f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return descr;
1384f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1385f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1386f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* This takes a cacheline where all the data is at the leaves
1387f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   (w8[..]) and builds a correctly normalised tree. */
1388f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1389f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1390f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word tno, cloff;
1391f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1392f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal* tree = &cl->svals[cloff];
1393f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cl->descrs[tno] = normalise_tree( tree );
1394f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1395f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(cloff == N_LINE_ARANGE);
13968f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   if (CHECK_ZSM)
1397f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1398f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_normalises++;
1399f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1400f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1401f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1402f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef struct { UChar count; SVal sval; } CountedSVal;
1403f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1404f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic
1405f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjvoid sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1406f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                               /*OUT*/Word* dstUsedP,
1407f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                               Word nDst, CacheLine* src )
1408f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1409f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word  tno, cloff, dstUsed;
1410f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1411f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(nDst == N_LINE_ARANGE);
1412f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   dstUsed = 0;
1413f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1414f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1415f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      UShort descr = src->descrs[tno];
1416f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal*  tree  = &src->svals[cloff];
1417f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1418f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* sequentialise the tree described by (descr,tree). */
1419f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#     define PUT(_n,_v)                                \
1420f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         do { dst[dstUsed  ].count = (_n);             \
1421f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj              dst[dstUsed++].sval  = (_v);             \
1422f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         } while (0)
1423f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1424f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* byte 0 */
1425f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_64)   PUT(8, tree[0]); else
1426f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1427f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1428f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_8_0)  PUT(1, tree[0]);
1429f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* byte 1 */
1430f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_8_1)  PUT(1, tree[1]);
1431f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* byte 2 */
1432f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1433f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_8_2)  PUT(1, tree[2]);
1434f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* byte 3 */
1435f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_8_3)  PUT(1, tree[3]);
1436f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* byte 4 */
1437f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1438f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1439f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_8_4)  PUT(1, tree[4]);
1440f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* byte 5 */
1441f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_8_5)  PUT(1, tree[5]);
1442f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* byte 6 */
1443f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1444f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_8_6)  PUT(1, tree[6]);
1445f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* byte 7 */
1446f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (descr & TREE_DESCR_8_7)  PUT(1, tree[7]);
1447f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1448f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#     undef PUT
1449f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* END sequentialise the tree described by (descr,tree). */
1450f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1451f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1452f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(cloff == N_LINE_ARANGE);
1453f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(dstUsed <= nDst);
1454f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1455f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   *dstUsedP = dstUsed;
1456f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1457f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1458f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Write the cacheline 'wix' to backing store.  Where it ends up
1459f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   is determined by its tag field. */
1460f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic __attribute__((noinline)) void cacheline_wback ( UWord wix )
1461f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1462f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word        i, j, k, m;
1463f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Addr        tag;
1464f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SecMap*     sm;
1465f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine*  cl;
1466f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineZ* lineZ;
1467f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineF* lineF;
1468f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word        zix, fix, csvalsUsed;
1469f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CountedSVal csvals[N_LINE_ARANGE];
1470f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal        sv;
1471f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1472f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (0)
1473f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("scache wback line %d\n", (Int)wix);
1474f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1475f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(wix >= 0 && wix < N_WAY_NENT);
1476f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1477f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tag =  cache_shmem.tags0[wix];
1478f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl  = &cache_shmem.lyns0[wix];
1479f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1480f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* The cache line may have been invalidated; if so, ignore it. */
1481f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!is_valid_scache_tag(tag))
1482f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
1483f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1484f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Where are we going to put it? */
1485f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   sm         = NULL;
1486f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineZ      = NULL;
1487f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineF      = NULL;
1488f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   zix = fix = -1;
1489f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1490f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* find the Z line to write in and rcdec it or the associated F
1491f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      line. */
1492f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   find_Z_for_writing( &sm, &zix, tag );
1493f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1494f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(sm);
1495f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1496f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineZ = &sm->linesZ[zix];
1497f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1498f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Generate the data to be stored */
14998f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   if (CHECK_ZSM)
1500f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1501f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1502f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   csvalsUsed = -1;
1503f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   sequentialise_CacheLine( csvals, &csvalsUsed,
1504f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                            N_LINE_ARANGE, cl );
1505f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
15065e5cb009574352880f1bc530e1a73ddaae5003fcflorian   if (0) VG_(printf)("%ld ", csvalsUsed);
1507f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1508f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineZ->dict[0] = lineZ->dict[1]
1509f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1510f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1511f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* i indexes actual shadow values, k is cursor in csvals */
1512f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   i = 0;
1513f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (k = 0; k < csvalsUsed; k++) {
1514f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1515f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      sv = csvals[k].sval;
15168f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
1517f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1518f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* do we already have it? */
1519f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1520f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1521f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1522f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1523f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* no.  look for a free slot. */
15248f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
1525f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(sv != SVal_INVALID);
1526f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (lineZ->dict[0]
1527f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj          == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1528f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (lineZ->dict[1]
1529f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj          == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1530f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (lineZ->dict[2]
1531f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj          == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1532f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (lineZ->dict[3]
1533f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj          == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1534f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      break; /* we'll have to use the f rep */
1535f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj     dict_ok:
1536f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      m = csvals[k].count;
1537f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (m == 8) {
1538f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+0, j );
1539f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+1, j );
1540f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+2, j );
1541f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+3, j );
1542f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+4, j );
1543f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+5, j );
1544f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+6, j );
1545f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+7, j );
1546f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         i += 8;
1547f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1548f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      else if (m == 4) {
1549f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+0, j );
1550f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+1, j );
1551f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+2, j );
1552f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+3, j );
1553f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         i += 4;
1554f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1555f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      else if (m == 1) {
1556f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+0, j );
1557f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         i += 1;
1558f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1559f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      else if (m == 2) {
1560f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+0, j );
1561f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write_twobit_array( lineZ->ix2s, i+1, j );
1562f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         i += 2;
1563f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1564f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      else {
1565f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1566f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1567f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1568f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1569f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1570f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (LIKELY(i == N_LINE_ARANGE)) {
1571f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* Construction of the compressed representation was
1572f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         successful. */
1573f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      rcinc_LineZ(lineZ);
1574f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      stats__cache_Z_wbacks++;
1575f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
1576f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* Cannot use the compressed(z) representation.  Use the full(f)
1577f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         rep instead. */
1578f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(i >= 0 && i < N_LINE_ARANGE);
1579f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
158071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe      lineF = alloc_LineF_for_Z (lineZ);
1581f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      i = 0;
1582f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      for (k = 0; k < csvalsUsed; k++) {
15838f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj         if (CHECK_ZSM)
1584f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1585f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         sv = csvals[k].sval;
15868f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj         if (CHECK_ZSM)
1587f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            tl_assert(sv != SVal_INVALID);
1588f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         for (m = csvals[k].count; m > 0; m--) {
1589f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            lineF->w64s[i] = sv;
1590f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            i++;
1591f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
1592f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1593f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(i == N_LINE_ARANGE);
1594f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      rcinc_LineF(lineF);
1595f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      stats__cache_F_wbacks++;
1596f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1597f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1598f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1599f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Fetch the cacheline 'wix' from the backing store.  The tag
1600f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   associated with 'wix' is assumed to have already been filled in;
1601f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   hence that is used to determine where in the backing store to read
1602f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   from. */
1603f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1604f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1605f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word       i;
1606f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Addr       tag;
1607f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
1608f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineZ*     lineZ;
1609f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   LineF*     lineF;
1610f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1611f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (0)
1612f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("scache fetch line %d\n", (Int)wix);
1613f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1614f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(wix >= 0 && wix < N_WAY_NENT);
1615f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1616f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tag =  cache_shmem.tags0[wix];
1617f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl  = &cache_shmem.lyns0[wix];
1618f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1619f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* reject nonsense requests */
1620f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(is_valid_scache_tag(tag));
1621f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1622f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineZ = NULL;
1623f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   lineF = NULL;
1624f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   find_ZF_for_reading( &lineZ, &lineF, tag );
1625f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1626f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1627f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* expand the data into the bottom layer of the tree, then get
1628f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cacheline_normalise to build the descriptor array. */
1629f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (lineF) {
1630f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      for (i = 0; i < N_LINE_ARANGE; i++) {
1631f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->svals[i] = lineF->w64s[i];
1632f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1633f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      stats__cache_F_fetches++;
1634f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
1635f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      for (i = 0; i < N_LINE_ARANGE; i++) {
1636f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         UWord ix = read_twobit_array( lineZ->ix2s, i );
16371475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe         if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
16381475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe         cl->svals[i] = lineZ->dict[ix];
16391475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe         if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
1640f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1641f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      stats__cache_Z_fetches++;
1642f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1643f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   normalise_CacheLine( cl );
1644f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1645f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
16468939e0990ab7a7fcc81f67e7ce3573148e4e1c2cphilippe/* Invalid the cachelines corresponding to the given range, which
16478939e0990ab7a7fcc81f67e7ce3573148e4e1c2cphilippe   must start and end on a cacheline boundary. */
1648f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1649f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
16508939e0990ab7a7fcc81f67e7ce3573148e4e1c2cphilippe   Word wix;
1651f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
16528939e0990ab7a7fcc81f67e7ce3573148e4e1c2cphilippe   /* ga must be on a cacheline boundary. */
16538939e0990ab7a7fcc81f67e7ce3573148e4e1c2cphilippe   tl_assert (is_valid_scache_tag (ga));
16548939e0990ab7a7fcc81f67e7ce3573148e4e1c2cphilippe   /* szB must be a multiple of cacheline size. */
1655f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
16568939e0990ab7a7fcc81f67e7ce3573148e4e1c2cphilippe
1657f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
1658f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1659f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Word nwix = szB / N_LINE_ARANGE;
1660f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
1661f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (nwix > N_WAY_NENT)
1662f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      nwix = N_WAY_NENT; // no need to check several times the same entry.
1663f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
1664f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   for (wix = 0; wix < nwix; wix++) {
1665f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1666f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1667f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      ga_ix++;
1668364f0bb8262e132da1659b82571c6615d24d0c3fphilippe      if (UNLIKELY(ga_ix == N_WAY_NENT))
1669f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         ga_ix = 0;
1670f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1671f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1672f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1673f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
1674f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void shmem__flush_and_invalidate_scache ( void ) {
1675f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word wix;
1676f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Addr tag;
1677f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (0) VG_(printf)("%s","scache flush and invalidate\n");
1678f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(!is_valid_scache_tag(1));
1679f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (wix = 0; wix < N_WAY_NENT; wix++) {
1680f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tag = cache_shmem.tags0[wix];
1681f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (tag == 1/*INVALID*/) {
1682f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* already invalid; nothing to do */
1683f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
1684f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_valid_scache_tag(tag));
1685f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cacheline_wback( wix );
1686f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
1687f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cache_shmem.tags0[wix] = 1/*INVALID*/;
1688f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1689f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   stats__cache_flushes_invals++;
1690f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1691f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1692f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1693f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Bool aligned16 ( Addr a ) {
1694f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return 0 == (a & 1);
1695f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1696f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Bool aligned32 ( Addr a ) {
1697f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return 0 == (a & 3);
1698f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1699f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Bool aligned64 ( Addr a ) {
1700f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return 0 == (a & 7);
1701f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1702f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline UWord get_cacheline_offset ( Addr a ) {
1703f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return (UWord)(a & (N_LINE_ARANGE - 1));
1704f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1705f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Addr cacheline_ROUNDUP ( Addr a ) {
1706f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return ROUNDUP(a, N_LINE_ARANGE);
1707f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1708f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Addr cacheline_ROUNDDN ( Addr a ) {
1709f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return ROUNDDN(a, N_LINE_ARANGE);
1710f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1711f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline UWord get_treeno ( Addr a ) {
1712f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return get_cacheline_offset(a) >> 3;
1713f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1714f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline UWord get_tree_offset ( Addr a ) {
1715f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return a & 7;
1716f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1717f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1718f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic __attribute__((noinline))
1719f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj       CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1720f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline CacheLine* get_cacheline ( Addr a )
1721f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1722f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* tag is 'a' with the in-line offset masked out,
1723f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      eg a[31]..a[4] 0000 */
1724f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Addr       tag = a & ~(N_LINE_ARANGE - 1);
1725f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1726f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cache_totrefs++;
1727f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (LIKELY(tag == cache_shmem.tags0[wix])) {
1728f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return &cache_shmem.lyns0[wix];
1729f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
1730f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return get_cacheline_MISS( a );
1731f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1732f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1733f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1734f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic __attribute__((noinline))
1735f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj       CacheLine* get_cacheline_MISS ( Addr a )
1736f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1737f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* tag is 'a' with the in-line offset masked out,
1738f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      eg a[31]..a[4] 0000 */
1739f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1740f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
1741f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Addr*      tag_old_p;
1742f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Addr       tag = a & ~(N_LINE_ARANGE - 1);
1743f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1744f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1745f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(tag != cache_shmem.tags0[wix]);
1746f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1747f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Dump the old line into the backing store. */
1748f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cache_totmisses++;
1749f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1750f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl        = &cache_shmem.lyns0[wix];
1751f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tag_old_p = &cache_shmem.tags0[wix];
1752f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1753f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (is_valid_scache_tag( *tag_old_p )) {
1754f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* EXPENSIVE and REDUNDANT: callee does it */
17558f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
1756f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1757f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cacheline_wback( wix );
1758f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1759f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* and reload the new one */
1760f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   *tag_old_p = tag;
1761f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cacheline_fetch( wix );
17628f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   if (CHECK_ZSM)
1763f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1764f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return cl;
1765f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1766f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1767f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1768f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_64to32pulldown++;
1769f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   switch (toff) {
1770f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 0: case 4:
1771f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(descr & TREE_DESCR_64);
1772f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tree[4] = tree[0];
1773f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~TREE_DESCR_64;
1774f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1775f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1776f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      default:
1777f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(0);
1778f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1779f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return descr;
1780f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1781f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1782f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1783f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_32to16pulldown++;
1784f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   switch (toff) {
1785f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 0: case 2:
1786f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_32_0)) {
1787f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pulldown_to_32(tree, 0, descr);
1788f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
1789f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(descr & TREE_DESCR_32_0);
1790f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tree[2] = tree[0];
1791f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~TREE_DESCR_32_0;
1792f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1793f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1794f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 4: case 6:
1795f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_32_1)) {
1796f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pulldown_to_32(tree, 4, descr);
1797f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
1798f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(descr & TREE_DESCR_32_1);
1799f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tree[6] = tree[4];
1800f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~TREE_DESCR_32_1;
1801f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1802f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1803f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      default:
1804f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(0);
1805f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1806f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return descr;
1807f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1808f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1809f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1810f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_16to8pulldown++;
1811f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   switch (toff) {
1812f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 0: case 1:
1813f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_16_0)) {
1814f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pulldown_to_16(tree, 0, descr);
1815f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
1816f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(descr & TREE_DESCR_16_0);
1817f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tree[1] = tree[0];
1818f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~TREE_DESCR_16_0;
1819f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1820f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1821f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 2: case 3:
1822f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_16_1)) {
1823f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pulldown_to_16(tree, 2, descr);
1824f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
1825f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(descr & TREE_DESCR_16_1);
1826f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tree[3] = tree[2];
1827f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~TREE_DESCR_16_1;
1828f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1829f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1830f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 4: case 5:
1831f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_16_2)) {
1832f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pulldown_to_16(tree, 4, descr);
1833f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
1834f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(descr & TREE_DESCR_16_2);
1835f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tree[5] = tree[4];
1836f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~TREE_DESCR_16_2;
1837f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1838f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1839f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 6: case 7:
1840f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_16_3)) {
1841f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pulldown_to_16(tree, 6, descr);
1842f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
1843f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(descr & TREE_DESCR_16_3);
1844f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tree[7] = tree[6];
1845f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~TREE_DESCR_16_3;
1846f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1847f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1848f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      default:
1849f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(0);
1850f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1851f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return descr;
1852f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1853f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1854f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1855f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1856f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort mask;
1857f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   switch (toff) {
1858f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 0:
1859f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1860f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert( (descr & mask) == mask );
1861f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~mask;
1862f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= TREE_DESCR_16_0;
1863f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1864f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 2:
1865f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1866f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert( (descr & mask) == mask );
1867f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~mask;
1868f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= TREE_DESCR_16_1;
1869f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1870f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 4:
1871f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1872f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert( (descr & mask) == mask );
1873f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~mask;
1874f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= TREE_DESCR_16_2;
1875f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1876f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 6:
1877f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1878f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert( (descr & mask) == mask );
1879f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~mask;
1880f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= TREE_DESCR_16_3;
1881f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1882f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      default:
1883f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(0);
1884f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1885f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return descr;
1886f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1887f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1888f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1889f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort mask;
1890f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   switch (toff) {
1891f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 0:
1892f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_16_0))
1893f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pullup_descr_to_16(descr, 0);
1894f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_16_1))
1895f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pullup_descr_to_16(descr, 2);
1896f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1897f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert( (descr & mask) == mask );
1898f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~mask;
1899f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= TREE_DESCR_32_0;
1900f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1901f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 4:
1902f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_16_2))
1903f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pullup_descr_to_16(descr, 4);
1904f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (!(descr & TREE_DESCR_16_3))
1905f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            descr = pullup_descr_to_16(descr, 6);
1906f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1907f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert( (descr & mask) == mask );
1908f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr &= ~mask;
1909f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         descr |= TREE_DESCR_32_1;
1910f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
1911f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      default:
1912f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(0);
1913f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1914f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return descr;
1915f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1916f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1917f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1918f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   switch (toff) {
1919f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 0: case 4:
1920f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         return 0 != (descr & TREE_DESCR_64);
1921f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      default:
1922f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(0);
1923f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1924f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1925f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1926f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1927f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   switch (toff) {
1928f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 0:
1929f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1930f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 2:
1931f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1932f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 4:
1933f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1934f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      case 6:
1935f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1936f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      default:
1937f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(0);
1938f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
1939f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1940f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1941f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* ------------ Cache management ------------ */
1942f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1943f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void zsm_flush_cache ( void )
1944f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1945f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   shmem__flush_and_invalidate_scache();
1946f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1947f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1948f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
19491475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippestatic void zsm_init ( void )
1950f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
1951f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert( sizeof(UWord) == sizeof(Addr) );
1952f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1953f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(map_shmem == NULL);
1954f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1955f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                           HG_(free),
1956f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                           NULL/*unboxed UWord cmp*/);
1957f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* Invalidate all cache entries. */
1958f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   tl_assert(!is_valid_scache_tag(1));
1959f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1960f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      cache_shmem.tags0[wix] = 1/*INVALID*/;
1961f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
1962f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
196371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   LineF_pool_allocator = VG_(newPA) (
196471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                             sizeof(LineF),
196571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                             /* Nr elements/pool to fill a core arena block
196671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                                taking some arena overhead into account. */
196771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                             (4 * 1024 * 1024 - 200)/sizeof(LineF),
196871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                             HG_(zalloc),
196971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                             "libhb.LineF_storage.pool",
197071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                             HG_(free)
197171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                          );
197271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
1973f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* a SecMap must contain an integral number of CacheLines */
1974f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1975f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* also ... a CacheLine holds an integral number of trees */
1976f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(0 == (N_LINE_ARANGE % 8));
1977f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
1978f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
1979f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
1980f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
1981f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
1982ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// SECTION END compressed shadow memory                        //
1983ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
1984ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
1985ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
1986e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
19877aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
19887aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
1989ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
1990ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
1991ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
1992ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj// SECTION BEGIN vts primitives                                //
1993ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj//                                                             //
1994ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
1995ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/////////////////////////////////////////////////////////////////
1996ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
1997ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
1998ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1999ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   being compact stand-ins for Thr*'s.  Use these functions to map
2000ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   between them. */
2001ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic ThrID Thr__to_ThrID   ( Thr*  thr   ); /* fwds */
2002ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic Thr*  Thr__from_ThrID ( ThrID thrid ); /* fwds */
2003ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2004e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj__attribute__((noreturn))
2005e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardjstatic void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2006e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj{
2007e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   if (due_to_nThrs) {
20086bf3726ebf7a04ca48a5e6cb1ad7a3065054e54eflorian      const HChar* s =
2009e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "\n"
2010e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "Helgrind: cannot continue, run aborted: too many threads.\n"
2011e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "Sorry.  Helgrind can only handle programs that create\n"
2012e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "%'llu or fewer threads over their entire lifetime.\n"
2013e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "\n";
201403e7d27ffe277c8e55ae689a98aee655a722af84sewardj      VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
2015e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   } else {
20166bf3726ebf7a04ca48a5e6cb1ad7a3065054e54eflorian      const HChar* s =
2017e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "\n"
2018e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "Helgrind: cannot continue, run aborted: too many\n"
2019e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "synchronisation events.  Sorry. Helgrind can only handle\n"
2020e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "programs which perform %'llu or fewer\n"
2021e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "inter-thread synchronisation events (locks, unlocks, etc).\n"
2022e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         "\n";
2023e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj      VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2024e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   }
2025e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   VG_(exit)(1);
2026e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   /*NOTREACHED*/
2027e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   tl_assert(0); /*wtf?!*/
2028e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj}
2029e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
2030e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
2031c3508652c3a00c0d0035603a7d738f2fe47e9331philippe/* The dead thread (ThrID, actually) tables.  A thread may only be
2032ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   listed here if we have been notified thereof by libhb_async_exit.
2033ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   New entries are added at the end.  The order isn't important, but
2034c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   the ThrID values must be unique.
2035c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   verydead_thread_table_not_pruned lists the identity of the threads
2036c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   that died since the previous round of pruning.
2037c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   Once pruning is done, these ThrID are added in verydead_thread_table.
2038c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   We don't actually need to keep the set of threads that have ever died --
2039ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   only the threads that have died since the previous round of
2040ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   pruning.  But it's useful for sanity check purposes to keep the
2041ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   entire set, so we do. */
2042c3508652c3a00c0d0035603a7d738f2fe47e9331philippestatic XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
2043ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic XArray* /* of ThrID */ verydead_thread_table = NULL;
2044ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2045ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Arbitrary total ordering on ThrIDs. */
20466bd9dc18c043927c1196caba20a327238a179c42florianstatic Int cmp__ThrID ( const void* v1, const void* v2 ) {
20476bd9dc18c043927c1196caba20a327238a179c42florian   ThrID id1 = *(const ThrID*)v1;
20486bd9dc18c043927c1196caba20a327238a179c42florian   ThrID id2 = *(const ThrID*)v2;
2049ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (id1 < id2) return -1;
2050ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (id1 > id2) return 1;
2051ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   return 0;
2052ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj}
2053ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2054c3508652c3a00c0d0035603a7d738f2fe47e9331philippestatic void verydead_thread_tables_init ( void )
2055ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj{
2056ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(!verydead_thread_table);
2057c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   tl_assert(!verydead_thread_table_not_pruned);
2058ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   verydead_thread_table
2059ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj     = VG_(newXA)( HG_(zalloc),
2060ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                   "libhb.verydead_thread_table_init.1",
2061ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                   HG_(free), sizeof(ThrID) );
2062ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
2063c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   verydead_thread_table_not_pruned
2064c3508652c3a00c0d0035603a7d738f2fe47e9331philippe     = VG_(newXA)( HG_(zalloc),
2065c3508652c3a00c0d0035603a7d738f2fe47e9331philippe                   "libhb.verydead_thread_table_init.2",
2066c3508652c3a00c0d0035603a7d738f2fe47e9331philippe                   HG_(free), sizeof(ThrID) );
2067c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
2068ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj}
2069ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2070c3508652c3a00c0d0035603a7d738f2fe47e9331philippestatic void verydead_thread_table_sort_and_check (XArray* thrids)
2071c3508652c3a00c0d0035603a7d738f2fe47e9331philippe{
2072c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   UWord i;
2073c3508652c3a00c0d0035603a7d738f2fe47e9331philippe
2074c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   VG_(sortXA)( thrids );
2075c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   /* Sanity check: check for unique .sts.thr values. */
2076c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   UWord nBT = VG_(sizeXA)( thrids );
2077c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   if (nBT > 0) {
2078c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      ThrID thrid1, thrid2;
2079c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2080c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      for (i = 1; i < nBT; i++) {
2081c3508652c3a00c0d0035603a7d738f2fe47e9331philippe         thrid1 = thrid2;
2082c3508652c3a00c0d0035603a7d738f2fe47e9331philippe         thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2083c3508652c3a00c0d0035603a7d738f2fe47e9331philippe         tl_assert(thrid1 < thrid2);
2084c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      }
2085c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   }
2086c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   /* Ok, so the dead thread table thrids has unique and in-order keys. */
2087c3508652c3a00c0d0035603a7d738f2fe47e9331philippe}
2088f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2089f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* A VTS contains .ts, its vector clock, and also .id, a field to hold
2090f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   a backlink for the caller's convenience.  Since we have no idea
2091f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   what to set that to in the library, it always gets set to
2092f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID_INVALID. */
2093f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
2094f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct {
20957aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      VtsID    id;
20967aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      UInt     usedTS;
20977aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      UInt     sizeTS;
20987aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      ScalarTS ts[0];
2099f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2100f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VTS;
2101f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
21027aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* Allocate a VTS capable of storing 'sizeTS' entries. */
21036bd9dc18c043927c1196caba20a327238a179c42florianstatic VTS* VTS__new ( const HChar* who, UInt sizeTS );
2104f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2105ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Make a clone of 'vts', sizing the new array to exactly match the
21067aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   number of ScalarTSs present. */
21076bd9dc18c043927c1196caba20a327238a179c42florianstatic VTS* VTS__clone ( const HChar* who, VTS* vts );
2108f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2109ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Make a clone of 'vts' with the thrids in 'thrids' removed.  The new
2110ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   array is sized exactly to hold the number of required elements.
2111ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2112ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   must be in strictly increasing order. */
21136bd9dc18c043927c1196caba20a327238a179c42florianstatic VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
2114ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2115f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Delete this VTS in its entirety. */
211623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void VTS__delete ( VTS* vts );
2117f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
21187aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* Create a new singleton VTS in 'out'.  Caller must have
21197aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   pre-allocated 'out' sufficiently big to hold the result in all
21207aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   possible cases. */
21217aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
2122f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
21237aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* Create in 'out' a VTS which is the same as 'vts' except with
21247aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   vts[me]++, so to speak.  Caller must have pre-allocated 'out'
21257aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   sufficiently big to hold the result in all possible cases. */
21267aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
2127f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
21287aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* Create in 'out' a VTS which is the join (max) of 'a' and
21297aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   'b'. Caller must have pre-allocated 'out' sufficiently big to hold
21307aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   the result in all possible cases. */
21317aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
2132f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
213323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* Compute the partial ordering relation of the two args.  Although we
213423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   could be completely general and return an enumeration value (EQ,
213523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   LT, GT, UN), in fact we only need LEQ, and so we may as well
213623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   hardwire that fact.
2137f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2138e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2139e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   invald ThrID).  In the latter case, the returned ThrID indicates
2140e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   the discovered point for which they are not.  There may be more
2141e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   than one such point, but we only care about seeing one of them, not
2142e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   all of them.  This rather strange convention is used because
2143e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   sometimes we want to know the actual index at which they first
2144e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   differ. */
2145e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardjstatic UInt VTS__cmpLEQ ( VTS* a, VTS* b );
2146f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2147f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Compute an arbitrary structural (total) ordering on the two args,
2148f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   based on their VCs, so they can be looked up in a table, tree, etc.
2149f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Returns -1, 0 or 1. */
215023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic Word VTS__cmp_structural ( VTS* a, VTS* b );
2151f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2152b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorian/* Debugging only.  Display the given VTS. */
2153b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorianstatic void VTS__show ( const VTS* vts );
2154f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2155f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Debugging only.  Return vts[index], so to speak. */
215623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
2157f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2158ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Notify the VTS machinery that a thread has been declared
2159ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   comprehensively dead: that is, it has done an async exit AND it has
2160ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   been joined with.  This should ensure that its local clocks (.viR
2161ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   and .viW) will never again change, and so all mentions of this
2162ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   thread from all VTSs in the system may be removed. */
2163ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic void VTS__declare_thread_very_dead ( Thr* idx );
2164f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2165f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--------------- to do with Vector Timestamps ---------------*/
2166f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2167f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Bool is_sane_VTS ( VTS* vts )
2168f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2169f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord     i, n;
2170f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ScalarTS  *st1, *st2;
2171f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!vts) return False;
2172555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj   if (vts->usedTS > vts->sizeTS) return False;
21737aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   n = vts->usedTS;
21747aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   if (n == 1) {
21757aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      st1 = &vts->ts[0];
21767aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      if (st1->tym == 0)
21777aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         return False;
21787aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   }
21797aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   else
2180f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (n >= 2) {
2181f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      for (i = 0; i < n-1; i++) {
21827aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         st1 = &vts->ts[i];
21837aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         st2 = &vts->ts[i+1];
2184e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         if (st1->thrid >= st2->thrid)
2185f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            return False;
2186f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (st1->tym == 0 || st2->tym == 0)
2187f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            return False;
2188f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
2189f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2190f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return True;
2191f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2192f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2193f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
21947aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* Create a new, empty VTS.
21957aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj*/
21966bd9dc18c043927c1196caba20a327238a179c42florianstatic VTS* VTS__new ( const HChar* who, UInt sizeTS )
21977aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj{
21987aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
21997aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(vts->usedTS == 0);
22007aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   vts->sizeTS = sizeTS;
22017aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
22027aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   return vts;
22037aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj}
22047aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
22057aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* Clone this VTS.
2206f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
22076bd9dc18c043927c1196caba20a327238a179c42florianstatic VTS* VTS__clone ( const HChar* who, VTS* vts )
2208f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2209f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(vts);
22107aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
22117aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   UInt nTS = vts->usedTS;
22127aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   VTS* clone = VTS__new(who, nTS);
22137aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   clone->id = vts->id;
22147aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   clone->sizeTS = nTS;
22157aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   clone->usedTS = nTS;
22167aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   UInt i;
22177aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   for (i = 0; i < nTS; i++) {
22187aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      clone->ts[i] = vts->ts[i];
22191b9876ba9156431a687fce36959603d0fd72604fsewardj   }
22207aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
22217aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   return clone;
2222f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2223f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2224f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2225ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Make a clone of a VTS with specified ThrIDs removed.  'thridsToDel'
2226ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   must be in strictly increasing order.  We could obviously do this
2227ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   much more efficiently (in linear time) if necessary.
2228ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj*/
22296bd9dc18c043927c1196caba20a327238a179c42florianstatic VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
2230ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj{
2231ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   UInt i, j;
2232ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(vts);
2233ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(thridsToDel);
2234ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2235ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   UInt nTS = vts->usedTS;
2236ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Figure out how many ScalarTSs will remain in the output. */
2237ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   UInt nReq = nTS;
2238ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   for (i = 0; i < nTS; i++) {
2239ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      ThrID thrid = vts->ts[i].thrid;
2240ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2241ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         nReq--;
2242ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
2243ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(nReq <= nTS);
2244ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Copy the ones that will remain. */
2245ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VTS* res = VTS__new(who, nReq);
2246ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   j = 0;
2247ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   for (i = 0; i < nTS; i++) {
2248ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      ThrID thrid = vts->ts[i].thrid;
2249ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2250ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         continue;
2251ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      res->ts[j++] = vts->ts[i];
2252ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
2253ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(j == nReq);
2254ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(j == res->sizeTS);
2255ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   res->usedTS = j;
2256ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2257ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   return res;
2258ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj}
2259ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2260ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2261f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Delete this VTS in its entirety.
2262f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
22637aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic void VTS__delete ( VTS* vts )
2264f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2265f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(vts);
22667aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(vts->usedTS <= vts->sizeTS);
22677aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2268f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   HG_(free)(vts);
2269f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2270f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2271f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2272f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Create a new singleton VTS.
2273f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
22747aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
22757aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj{
2276f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(thr);
2277f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(tym >= 1);
22787aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out);
22797aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->usedTS == 0);
22807aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->sizeTS >= 1);
22817aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   UInt hi = out->usedTS++;
22827aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   out->ts[hi].thrid = Thr__to_ThrID(thr);
22837aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   out->ts[hi].tym   = tym;
2284f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2285f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2286f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2287f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Return a new VTS in which vts[me]++, so to speak.  'vts' itself is
2288f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   not modified.
2289f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
22907aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
2291f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
22927aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   UInt      i, n;
2293e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   ThrID     me_thrid;
22947aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   Bool      found = False;
2295c8028add6294793dfc80a80d920c7dba3a89f312sewardj
2296c8028add6294793dfc80a80d920c7dba3a89f312sewardj   stats__vts__tick++;
2297c8028add6294793dfc80a80d920c7dba3a89f312sewardj
22987aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out);
22997aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->usedTS == 0);
23007aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   if (vts->usedTS >= ThrID_MAX_VALID)
23017aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
23027aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->sizeTS >= 1 + vts->usedTS);
23037aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
2304f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(me);
2305e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   me_thrid = Thr__to_ThrID(me);
2306f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(is_sane_VTS(vts));
23077aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   n = vts->usedTS;
2308f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2309555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj   /* Copy all entries which precede 'me'. */
2310555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj   for (i = 0; i < n; i++) {
2311555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      ScalarTS* here = &vts->ts[i];
2312555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      if (UNLIKELY(here->thrid >= me_thrid))
2313555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj         break;
23147aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      UInt hi = out->usedTS++;
2315555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      out->ts[hi] = *here;
2316f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2317f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2318555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj   /* 'i' now indicates the next entry to copy, if any.
2319555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj       There are 3 possibilities:
2320555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj       (a) there is no next entry (we used them all up already):
2321555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj           add (me_thrid,1) to the output, and quit
2322555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj       (b) there is a next entry, and its thrid > me_thrid:
2323555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj           add (me_thrid,1) to the output, then copy the remaining entries
2324555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj       (c) there is a next entry, and its thrid == me_thrid:
2325555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj           copy it to the output but increment its timestamp value.
2326555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj           Then copy the remaining entries.  (c) is the common case.
2327555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj   */
2328555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj   tl_assert(i >= 0 && i <= n);
2329555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj   if (i == n) { /* case (a) */
2330555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      UInt hi = out->usedTS++;
2331555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      out->ts[hi].thrid = me_thrid;
2332555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      out->ts[hi].tym   = 1;
2333555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj   } else {
2334555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      /* cases (b) and (c) */
2335555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      ScalarTS* here = &vts->ts[i];
2336555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      if (me_thrid == here->thrid) { /* case (c) */
23377aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
2338e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            /* We're hosed.  We have to stop. */
2339e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2340e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         }
23417aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         UInt hi = out->usedTS++;
23427aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         out->ts[hi].thrid = here->thrid;
23437aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         out->ts[hi].tym   = here->tym + 1;
2344f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         i++;
2345555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj         found = True;
2346555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      } else { /* case (b) */
23477aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         UInt hi = out->usedTS++;
2348555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj         out->ts[hi].thrid = me_thrid;
2349555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj         out->ts[hi].tym   = 1;
2350f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
2351555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj      /* And copy any remaining entries. */
2352f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      for (/*keepgoing*/; i < n; i++) {
2353555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj         ScalarTS* here2 = &vts->ts[i];
23547aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         UInt hi = out->usedTS++;
2355555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj         out->ts[hi] = *here2;
2356f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
2357f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2358555fc57fc4c8ed646139b9e7ceaeb2857a16972csewardj
23597aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(is_sane_VTS(out));
23607aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
23617aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->usedTS <= out->sizeTS);
2362f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2363f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2364f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2365f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Return a new VTS constructed as the join (max) of the 2 args.
2366f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Neither arg is modified.
2367f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
23687aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
2369f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
23707aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   UInt     ia, ib, useda, usedb;
2371f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ULong    tyma, tymb, tymMax;
2372e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   ThrID    thrid;
23737aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   UInt     ncommon = 0;
2374f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2375c8028add6294793dfc80a80d920c7dba3a89f312sewardj   stats__vts__join++;
2376c8028add6294793dfc80a80d920c7dba3a89f312sewardj
23777aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(a);
23787aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(b);
23797aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   useda = a->usedTS;
23807aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   usedb = b->usedTS;
23817aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
23827aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out);
23837aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->usedTS == 0);
23847aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   /* overly conservative test, but doing better involves comparing
23857aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      the two VTSs, which we don't want to do at this point. */
23867aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   if (useda + usedb >= ThrID_MAX_VALID)
23877aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
23887aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->sizeTS >= useda + usedb);
2389f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2390f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ia = ib = 0;
2391f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2392f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   while (1) {
2393f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2394e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj      /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2395e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         from a and b in order, where thrid is the next ThrID
2396f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         occurring in either a or b, and tyma/b are the relevant
2397f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         scalar timestamps, taking into account implicit zeroes. */
2398f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(ia >= 0 && ia <= useda);
2399f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(ib >= 0 && ib <= usedb);
2400f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
24014c245e595b9f6300d3120408ca873f7115d9cc7dnjn      if        (ia == useda && ib == usedb) {
2402f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* both empty - done */
2403f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
24044c245e595b9f6300d3120408ca873f7115d9cc7dnjn
24054c245e595b9f6300d3120408ca873f7115d9cc7dnjn      } else if (ia == useda && ib != usedb) {
2406f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* a empty, use up b */
24077aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         ScalarTS* tmpb = &b->ts[ib];
2408e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         thrid = tmpb->thrid;
2409e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tyma  = 0;
2410e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tymb  = tmpb->tym;
2411f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         ib++;
24124c245e595b9f6300d3120408ca873f7115d9cc7dnjn
24134c245e595b9f6300d3120408ca873f7115d9cc7dnjn      } else if (ia != useda && ib == usedb) {
2414f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* b empty, use up a */
24157aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         ScalarTS* tmpa = &a->ts[ia];
2416e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         thrid = tmpa->thrid;
2417e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tyma  = tmpa->tym;
2418e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tymb  = 0;
2419f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         ia++;
24204c245e595b9f6300d3120408ca873f7115d9cc7dnjn
24214c245e595b9f6300d3120408ca873f7115d9cc7dnjn      } else {
2422e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         /* both not empty; extract lowest-ThrID'd triple */
24237aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         ScalarTS* tmpa = &a->ts[ia];
24247aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         ScalarTS* tmpb = &b->ts[ib];
2425e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         if (tmpa->thrid < tmpb->thrid) {
2426e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            /* a has the lowest unconsidered ThrID */
2427e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            thrid = tmpa->thrid;
2428e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tyma  = tmpa->tym;
2429e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tymb  = 0;
2430f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            ia++;
2431e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         } else if (tmpa->thrid > tmpb->thrid) {
2432e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            /* b has the lowest unconsidered ThrID */
2433e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            thrid = tmpb->thrid;
2434e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tyma  = 0;
2435e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tymb  = tmpb->tym;
2436f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            ib++;
2437f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         } else {
2438e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            /* they both next mention the same ThrID */
2439e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tl_assert(tmpa->thrid == tmpb->thrid);
2440e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            thrid = tmpa->thrid; /* == tmpb->thrid */
2441e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tyma  = tmpa->tym;
2442e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tymb  = tmpb->tym;
2443f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            ia++;
2444f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            ib++;
24457aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj            ncommon++;
2446f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
2447f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
2448f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2449f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* having laboriously determined (thr, tyma, tymb), do something
2450f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         useful with it. */
2451f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tymMax = tyma > tymb ? tyma : tymb;
2452f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (tymMax > 0) {
24537aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         UInt hi = out->usedTS++;
24547aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         out->ts[hi].thrid = thrid;
24557aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         out->ts[hi].tym   = tymMax;
2456f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
2457f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2458f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2459f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
24607aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(is_sane_VTS(out));
24617aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->usedTS <= out->sizeTS);
24627aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(out->usedTS == useda + usedb - ncommon);
2463f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2464f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2465f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2466e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj/* Determine if 'a' <= 'b', in the partial ordering.  Returns zero if
2467e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   they are, or the first ThrID for which they are not (no valid ThrID
2468e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   has the value zero).  This rather strange convention is used
2469e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   because sometimes we want to know the actual index at which they
2470e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   first differ. */
2471e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardjstatic UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
2472f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
247323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Word  ia, ib, useda, usedb;
247423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   ULong tyma, tymb;
2475f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2476c8028add6294793dfc80a80d920c7dba3a89f312sewardj   stats__vts__cmpLEQ++;
2477c8028add6294793dfc80a80d920c7dba3a89f312sewardj
24787aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(a);
24797aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(b);
24807aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   useda = a->usedTS;
24817aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   usedb = b->usedTS;
2482f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2483f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ia = ib = 0;
2484f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2485f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   while (1) {
2486f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
24874c245e595b9f6300d3120408ca873f7115d9cc7dnjn      /* This logic is to enumerate doubles (tyma, tymb) drawn
24884c245e595b9f6300d3120408ca873f7115d9cc7dnjn         from a and b in order, and tyma/b are the relevant
2489f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         scalar timestamps, taking into account implicit zeroes. */
2490e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj      ThrID thrid;
249123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
2492f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(ia >= 0 && ia <= useda);
2493f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(ib >= 0 && ib <= usedb);
2494f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
24954c245e595b9f6300d3120408ca873f7115d9cc7dnjn      if        (ia == useda && ib == usedb) {
2496f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* both empty - done */
2497f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         break;
24984c245e595b9f6300d3120408ca873f7115d9cc7dnjn
24994c245e595b9f6300d3120408ca873f7115d9cc7dnjn      } else if (ia == useda && ib != usedb) {
2500f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* a empty, use up b */
25017aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         ScalarTS* tmpb = &b->ts[ib];
2502e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tyma  = 0;
2503e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tymb  = tmpb->tym;
2504e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         thrid = tmpb->thrid;
2505f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         ib++;
25064c245e595b9f6300d3120408ca873f7115d9cc7dnjn
25074c245e595b9f6300d3120408ca873f7115d9cc7dnjn      } else if (ia != useda && ib == usedb) {
2508f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* b empty, use up a */
25097aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         ScalarTS* tmpa = &a->ts[ia];
2510e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tyma  = tmpa->tym;
2511e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         thrid = tmpa->thrid;
2512e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tymb  = 0;
2513f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         ia++;
25144c245e595b9f6300d3120408ca873f7115d9cc7dnjn
25154c245e595b9f6300d3120408ca873f7115d9cc7dnjn      } else {
2516e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         /* both not empty; extract lowest-ThrID'd triple */
25177aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         ScalarTS* tmpa = &a->ts[ia];
25187aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj         ScalarTS* tmpb = &b->ts[ib];
2519e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         if (tmpa->thrid < tmpb->thrid) {
2520e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            /* a has the lowest unconsidered ThrID */
2521e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tyma  = tmpa->tym;
2522e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            thrid = tmpa->thrid;
2523e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tymb  = 0;
2524f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            ia++;
2525f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
2526f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         else
2527e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         if (tmpa->thrid > tmpb->thrid) {
2528e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            /* b has the lowest unconsidered ThrID */
2529e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tyma  = 0;
2530e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tymb  = tmpb->tym;
2531e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            thrid = tmpb->thrid;
2532f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            ib++;
2533f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         } else {
2534e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            /* they both next mention the same ThrID */
2535e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tl_assert(tmpa->thrid == tmpb->thrid);
2536e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tyma  = tmpa->tym;
2537e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            thrid = tmpa->thrid;
2538e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj            tymb  = tmpb->tym;
2539f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            ia++;
2540f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            ib++;
2541f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
2542f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
2543f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
25444c245e595b9f6300d3120408ca873f7115d9cc7dnjn      /* having laboriously determined (tyma, tymb), do something
2545f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         useful with it. */
254623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (tyma > tymb) {
254723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         /* not LEQ at this index.  Quit, since the answer is
254823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            determined already. */
2549e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         tl_assert(thrid >= 1024);
2550e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         return thrid;
255123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      }
2552f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2553f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2554e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   return 0; /* all points are LEQ => return an invalid ThrID */
2555f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2556f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2557f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2558f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Compute an arbitrary structural (total) ordering on the two args,
2559f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   based on their VCs, so they can be looked up in a table, tree, etc.
2560c8028add6294793dfc80a80d920c7dba3a89f312sewardj   Returns -1, 0 or 1.  (really just 'deriving Ord' :-) This can be
2561c8028add6294793dfc80a80d920c7dba3a89f312sewardj   performance critical so there is some effort expended to make it sa
2562c8028add6294793dfc80a80d920c7dba3a89f312sewardj   fast as possible.
2563f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
2564f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjWord VTS__cmp_structural ( VTS* a, VTS* b )
2565f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2566f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* We just need to generate an arbitrary total ordering based on
2567f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a->ts and b->ts.  Preferably do it in a way which comes across likely
2568f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      differences relatively quickly. */
2569c8028add6294793dfc80a80d920c7dba3a89f312sewardj   Word     i;
2570c8028add6294793dfc80a80d920c7dba3a89f312sewardj   Word     useda = 0,    usedb = 0;
2571c8028add6294793dfc80a80d920c7dba3a89f312sewardj   ScalarTS *ctsa = NULL, *ctsb = NULL;
2572c8028add6294793dfc80a80d920c7dba3a89f312sewardj
2573c8028add6294793dfc80a80d920c7dba3a89f312sewardj   stats__vts__cmp_structural++;
2574c8028add6294793dfc80a80d920c7dba3a89f312sewardj
2575c8028add6294793dfc80a80d920c7dba3a89f312sewardj   tl_assert(a);
2576c8028add6294793dfc80a80d920c7dba3a89f312sewardj   tl_assert(b);
2577c8028add6294793dfc80a80d920c7dba3a89f312sewardj
25787aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   ctsa = &a->ts[0]; useda = a->usedTS;
25797aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   ctsb = &b->ts[0]; usedb = b->usedTS;
2580c8028add6294793dfc80a80d920c7dba3a89f312sewardj
2581c8028add6294793dfc80a80d920c7dba3a89f312sewardj   if (LIKELY(useda == usedb)) {
2582c8028add6294793dfc80a80d920c7dba3a89f312sewardj      ScalarTS *tmpa = NULL, *tmpb = NULL;
2583c8028add6294793dfc80a80d920c7dba3a89f312sewardj      stats__vts__cmp_structural_slow++;
2584c8028add6294793dfc80a80d920c7dba3a89f312sewardj      /* Same length vectors.  Find the first difference, if any, as
2585c8028add6294793dfc80a80d920c7dba3a89f312sewardj         fast as possible. */
2586c8028add6294793dfc80a80d920c7dba3a89f312sewardj      for (i = 0; i < useda; i++) {
2587c8028add6294793dfc80a80d920c7dba3a89f312sewardj         tmpa = &ctsa[i];
2588c8028add6294793dfc80a80d920c7dba3a89f312sewardj         tmpb = &ctsb[i];
2589e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         if (LIKELY(tmpa->tym == tmpb->tym
2590e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj                    && tmpa->thrid == tmpb->thrid))
2591c8028add6294793dfc80a80d920c7dba3a89f312sewardj            continue;
2592c8028add6294793dfc80a80d920c7dba3a89f312sewardj         else
2593c8028add6294793dfc80a80d920c7dba3a89f312sewardj            break;
2594c8028add6294793dfc80a80d920c7dba3a89f312sewardj      }
2595c8028add6294793dfc80a80d920c7dba3a89f312sewardj      if (UNLIKELY(i == useda)) {
2596c8028add6294793dfc80a80d920c7dba3a89f312sewardj         /* They're identical. */
2597c8028add6294793dfc80a80d920c7dba3a89f312sewardj         return 0;
2598c8028add6294793dfc80a80d920c7dba3a89f312sewardj      } else {
2599c8028add6294793dfc80a80d920c7dba3a89f312sewardj         tl_assert(i >= 0 && i < useda);
2600c8028add6294793dfc80a80d920c7dba3a89f312sewardj         if (tmpa->tym < tmpb->tym) return -1;
2601c8028add6294793dfc80a80d920c7dba3a89f312sewardj         if (tmpa->tym > tmpb->tym) return 1;
2602e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         if (tmpa->thrid < tmpb->thrid) return -1;
2603e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj         if (tmpa->thrid > tmpb->thrid) return 1;
2604c8028add6294793dfc80a80d920c7dba3a89f312sewardj         /* we just established them as non-identical, hence: */
2605c8028add6294793dfc80a80d920c7dba3a89f312sewardj      }
2606c8028add6294793dfc80a80d920c7dba3a89f312sewardj      /*NOTREACHED*/
2607c8028add6294793dfc80a80d920c7dba3a89f312sewardj      tl_assert(0);
2608c8028add6294793dfc80a80d920c7dba3a89f312sewardj   }
2609f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2610f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (useda < usedb) return -1;
2611f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (useda > usedb) return 1;
2612c8028add6294793dfc80a80d920c7dba3a89f312sewardj   /*NOTREACHED*/
2613c8028add6294793dfc80a80d920c7dba3a89f312sewardj   tl_assert(0);
2614f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2615f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2616f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2617b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorian/* Debugging only.  Display the given VTS.
2618f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
2619b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorianstatic void VTS__show ( const VTS* vts )
26207aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj{
2621f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word      i, n;
26224367abed70050175860cce60990fd2ab3ca805b9florian   tl_assert(vts);
2623b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorian
2624b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorian   VG_(printf)("[");
26257aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   n =  vts->usedTS;
2626f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < n; i++) {
2627b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorian      const ScalarTS *st = &vts->ts[i];
26285e5cb009574352880f1bc530e1a73ddaae5003fcflorian      VG_(printf)(i < n-1 ? "%d:%llu " : "%d:%llu", st->thrid, (ULong)st->tym);
2629f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2630b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorian   VG_(printf)("]");
2631f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2632f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2633f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2634f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Debugging only.  Return vts[index], so to speak.
2635f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
26367aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
26377aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj{
2638f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord i, n;
2639e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   ThrID idx_thrid = Thr__to_ThrID(idx);
2640c8028add6294793dfc80a80d920c7dba3a89f312sewardj   stats__vts__indexat_slow++;
26414367abed70050175860cce60990fd2ab3ca805b9florian   tl_assert(vts);
26427aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   n = vts->usedTS;
2643f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < n; i++) {
26447aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      ScalarTS* st = &vts->ts[i];
2645e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj      if (st->thrid == idx_thrid)
2646f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         return st->tym;
2647f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2648f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return 0;
2649f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2650f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2651f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2652ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* See comment on prototype above.
2653ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj*/
2654ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic void VTS__declare_thread_very_dead ( Thr* thr )
2655ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj{
2656ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (0) VG_(printf)("VTQ:  tae %p\n", thr);
2657ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2658ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(thr->llexit_done);
2659ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(thr->joinedwith_done);
2660ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2661ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ThrID nyu;
2662ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   nyu = Thr__to_ThrID(thr);
2663c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
2664ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2665ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* We can only get here if we're assured that we'll never again
2666ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      need to look at this thread's ::viR or ::viW.  Set them to
2667ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2668ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      mostly so that we don't wind up pruning them (as that would be
2669ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      nonsensical: the only interesting ScalarTS entry for a dead
2670ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      thread is its own index, and the pruning will remove that.). */
2671ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsID__rcdec(thr->viR);
2672ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsID__rcdec(thr->viW);
2673ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   thr->viR = VtsID_INVALID;
2674ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   thr->viW = VtsID_INVALID;
2675ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj}
2676ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2677ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2678f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
2679f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
2680f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
2681f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// SECTION END vts primitives                                  //
2682f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
2683f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
2684f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
2685f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2686f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2687f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2688f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
2689f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
2690f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
2691f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// SECTION BEGIN main library                                  //
2692f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
2693f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
2694f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
2695f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2696f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2697f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
2698f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
2699f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// VTS set                                             //
2700f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
2701f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
2702f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2703ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic WordFM* /* WordFM VTS* void */ vts_set = NULL;
2704f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2705f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void vts_set_init ( void )
2706f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2707f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(!vts_set);
2708f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2709f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                         HG_(free),
2710f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                         (Word(*)(UWord,UWord))VTS__cmp_structural );
2711f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2712f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
27137aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* Given a VTS, look in vts_set to see if we already have a
27147aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   structurally identical one.  If yes, return the pair (True, pointer
27157aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   to the existing one).  If no, clone this one, add the clone to the
27167aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   set, and return (False, pointer to the clone). */
27177aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
2718f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2719f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord keyW, valW;
27207aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   stats__vts_set__focaa++;
27217aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(cand->id == VtsID_INVALID);
2722f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* lookup cand (by value) */
2723f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2724f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* found it */
2725f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(valW == 0);
2726f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* if this fails, cand (by ref) was already present (!) */
2727f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(keyW != (UWord)cand);
27287aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      *res = (VTS*)keyW;
27297aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      return True;
2730f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
27317aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      /* not present.  Clone, add and return address of clone. */
27327aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      stats__vts_set__focaa_a++;
27337aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
27347aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      tl_assert(clone != cand);
27357aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
27367aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      *res = clone;
27377aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      return False;
2738f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2739f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2740f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2741f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2742f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
2743f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
2744f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// VTS table                                           //
2745f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
2746f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
2747f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2748f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void VtsID__invalidate_caches ( void ); /* fwds */
2749f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2750f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* A type to hold VTS table entries.  Invariants:
2751f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   If .vts == NULL, then this entry is not in use, so:
2752f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   - .rc == 0
2753f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   - this entry is on the freelist (unfortunately, does not imply
2754a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe     any constraints on value for u.freelink)
2755f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   If .vts != NULL, then this entry is in use:
2756f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   - .vts is findable in vts_set
2757f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   - .vts->id == this entry number
2758f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   - no specific value for .rc (even 0 is OK)
2759a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe   - this entry is not on freelist, so u.freelink == VtsID_INVALID
2760f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
2761f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
2762f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct {
2763f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VTS*  vts;      /* vts, in vts_set */
2764f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      UWord rc;       /* reference count - enough for entire aspace */
2765a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      union {
2766a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe         VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2767a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe         VtsID remap;    /* used only during pruning, for used entries */
2768a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      } u;
2769a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      /* u.freelink only used when vts == NULL,
2770a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe         u.remap only used when vts != NULL, during pruning. */
2771f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2772f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsTE;
2773f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2774f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* The VTS table. */
2775f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic XArray* /* of VtsTE */ vts_tab = NULL;
2776f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2777f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* An index into the VTS table, indicating the start of the list of
2778f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   free (available for use) entries.  If the list is empty, this is
2779f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID_INVALID. */
2780f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic VtsID vts_tab_freelist = VtsID_INVALID;
2781f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2782f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2783f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vts_tab equals or exceeds this size.  After GC, the value here is
2784f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   set appropriately so as to check for the next GC point. */
2785f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Word vts_next_GC_at = 1000;
2786f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2787f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void vts_tab_init ( void )
2788f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
278991ed8ccd3dae8a6abfaa45cc0d250df47b45187fflorian   vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
279091ed8ccd3dae8a6abfaa45cc0d250df47b45187fflorian                         HG_(free), sizeof(VtsTE) );
279191ed8ccd3dae8a6abfaa45cc0d250df47b45187fflorian   vts_tab_freelist = VtsID_INVALID;
2792f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2793f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2794f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Add ii to the free list, checking that it looks out-of-use. */
2795f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void add_to_free_list ( VtsID ii )
2796f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2797f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2798f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->vts == NULL);
2799f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->rc == 0);
2800a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe   tl_assert(ie->u.freelink == VtsID_INVALID);
2801a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe   ie->u.freelink = vts_tab_freelist;
2802f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vts_tab_freelist = ii;
2803f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2804f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2805f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Get an entry from the free list.  This will return VtsID_INVALID if
2806f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   the free list is empty. */
2807f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic VtsID get_from_free_list ( void )
2808f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2809f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID  ii;
2810f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsTE* ie;
2811f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (vts_tab_freelist == VtsID_INVALID)
2812f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return VtsID_INVALID;
2813f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ii = vts_tab_freelist;
2814f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ie = VG_(indexXA)( vts_tab, ii );
2815f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->vts == NULL);
2816f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->rc == 0);
2817a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe   vts_tab_freelist = ie->u.freelink;
2818f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return ii;
2819f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2820f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2821f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Produce a new VtsID that can be used, either by getting it from
2822f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   the freelist, or, if that is empty, by expanding vts_tab. */
2823f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic VtsID get_new_VtsID ( void )
2824f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2825f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID ii;
2826f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsTE te;
2827f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ii = get_from_free_list();
2828f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (ii != VtsID_INVALID)
2829f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return ii;
2830f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   te.vts = NULL;
2831f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   te.rc = 0;
2832a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe   te.u.freelink = VtsID_INVALID;
2833f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2834f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return ii;
2835f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2836f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2837f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2838f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Indirect callback from lib_zsm. */
2839f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void VtsID__rcinc ( VtsID ii )
2840f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2841f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsTE* ie;
2842f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* VG_(indexXA) does a range check for us */
2843f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ie = VG_(indexXA)( vts_tab, ii );
2844f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->vts); /* else it's not in use */
2845f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->rc < ~0UL); /* else we can't continue */
2846f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->vts->id == ii);
2847f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ie->rc++;
2848f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2849f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2850f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Indirect callback from lib_zsm. */
2851f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void VtsID__rcdec ( VtsID ii )
2852f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2853f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsTE* ie;
2854f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* VG_(indexXA) does a range check for us */
2855f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ie = VG_(indexXA)( vts_tab, ii );
2856f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->vts); /* else it's not in use */
2857f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->rc > 0); /* else RC snafu */
2858f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ie->vts->id == ii);
2859f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ie->rc--;
2860f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2861f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2862f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
28637aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* Look up 'cand' in our collection of VTSs.  If present, return the
28647aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   VtsID for the pre-existing version.  If not present, clone it, add
28657aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   the clone to both vts_tab and vts_set, allocate a fresh VtsID for
28667aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   it, and return that. */
28677aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
2868f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
28697aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   VTS* in_tab = NULL;
2870f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(cand->id == VtsID_INVALID);
28717aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
28727aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   tl_assert(in_tab);
28737aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   if (already_have) {
28747aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      /* We already have a copy of 'cand'.  Use that. */
2875f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsTE* ie;
28767aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      tl_assert(in_tab->id != VtsID_INVALID);
28777aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      ie = VG_(indexXA)( vts_tab, in_tab->id );
28787aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      tl_assert(ie->vts == in_tab);
28797aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      return in_tab->id;
2880f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
2881f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID  ii = get_new_VtsID();
2882f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsTE* ie = VG_(indexXA)( vts_tab, ii );
28837aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      ie->vts = in_tab;
2884f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ie->rc = 0;
2885a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      ie->u.freelink = VtsID_INVALID;
28867aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      in_tab->id = ii;
2887f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return ii;
2888f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2889f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2890f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2891f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
28926bd9dc18c043927c1196caba20a327238a179c42florianstatic void show_vts_stats ( const HChar* caller )
2893f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2894f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord nSet, nTab, nLive;
2895f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ULong totrc;
2896f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord n, i;
2897f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   nSet = VG_(sizeFM)( vts_set );
2898f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   nTab = VG_(sizeXA)( vts_tab );
2899f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   totrc = 0;
2900f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   nLive = 0;
2901f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   n = VG_(sizeXA)( vts_tab );
2902f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < n; i++) {
2903f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsTE* ie = VG_(indexXA)( vts_tab, i );
2904f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (ie->vts) {
2905f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         nLive++;
2906f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         totrc += (ULong)ie->rc;
2907f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
2908f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(ie->rc == 0);
2909f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
2910f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2911f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("  show_vts_stats %s\n", caller);
2912f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("    vts_tab size %4lu\n", nTab);
2913f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("    vts_tab live %4lu\n", nLive);
2914f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("    vts_set size %4lu\n", nSet);
2915f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VG_(printf)("        total rc %4llu\n", totrc);
2916f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
2917f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2918ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2919ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* --- Helpers for VtsID pruning --- */
2920ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2921ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic
2922ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjvoid remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2923ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                   /*MOD*/XArray* /* of VtsTE */ new_tab,
2924ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                   VtsID* ii )
2925ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj{
2926ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsTE *old_te, *new_te;
2927ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsID old_id, new_id;
2928ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* We're relying here on VG_(indexXA)'s range checking to assert on
2929ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      any stupid values, in particular *ii == VtsID_INVALID. */
2930ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   old_id = *ii;
2931ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   old_te = VG_(indexXA)( old_tab, old_id );
2932ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   old_te->rc--;
2933a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe   new_id = old_te->u.remap;
2934ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   new_te = VG_(indexXA)( new_tab, new_id );
2935ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   new_te->rc++;
2936ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   *ii = new_id;
2937ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj}
2938ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2939ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic
2940ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjvoid remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2941ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                            /*MOD*/XArray* /* of VtsTE */ new_tab,
2942ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                            SVal* s )
2943ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj{
2944ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   SVal old_sv, new_sv;
2945ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   old_sv = *s;
2946ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (SVal__isC(old_sv)) {
2947ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VtsID rMin, wMin;
2948ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      rMin = SVal__unC_Rmin(old_sv);
2949ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      wMin = SVal__unC_Wmin(old_sv);
2950ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      remap_VtsID( old_tab, new_tab, &rMin );
2951ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      remap_VtsID( old_tab, new_tab, &wMin );
2952ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      new_sv = SVal__mkC( rMin, wMin );
2953ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      *s = new_sv;
2954ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj  }
2955ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj}
2956ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2957ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
2958f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* NOT TO BE CALLED FROM WITHIN libzsm. */
29598fd92d394106e491df4e771f1b7e7966a550dca9sewardj__attribute__((noinline))
2960f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void vts_tab__do_GC ( Bool show_stats )
2961f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
2962f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord i, nTab, nLive, nFreed;
2963f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2964ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* ---------- BEGIN VTS GC ---------- */
2965f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* check this is actually necessary. */
2966f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(vts_tab_freelist == VtsID_INVALID);
2967f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2968f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* empty the caches for partial order checks and binary joins.  We
2969f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      could do better and prune out the entries to be deleted, but it
2970f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ain't worth the hassle. */
2971f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__invalidate_caches();
2972f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2973f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* First, make the reference counts up to date. */
2974f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   zsm_flush_cache();
2975f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2976f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   nTab = VG_(sizeXA)( vts_tab );
2977f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2978f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (show_stats) {
2979f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2980f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      show_vts_stats("before GC");
2981f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
2982f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
2983ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Now we can inspect the entire vts_tab.  Any entries with zero
2984ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      .rc fields are now no longer in use and can be put back on the
2985f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      free list, removed from vts_set, and deleted. */
2986f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   nFreed = 0;
2987f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < nTab; i++) {
2988f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      Bool present;
2989ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      UWord oldK = 0, oldV = 12345;
2990f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsTE* te = VG_(indexXA)( vts_tab, i );
2991f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (te->vts == NULL) {
2992f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(te->rc == 0);
2993f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         continue; /* already on the free list (presumably) */
2994f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
2995f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (te->rc > 0)
2996f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         continue; /* in use */
2997f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* Ok, we got one we can free. */
2998f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(te->vts->id == i);
2999f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* first, remove it from vts_set. */
3000f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      present = VG_(delFromFM)( vts_set,
3001f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                &oldK, &oldV, (UWord)te->vts );
3002f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(present); /* else it isn't in vts_set ?! */
3003f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3004f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3005f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* now free the VTS itself */
3006f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VTS__delete(te->vts);
3007f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      te->vts = NULL;
3008f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* and finally put this entry on the free list */
3009a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
3010f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      add_to_free_list( i );
3011f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      nFreed++;
3012f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
3013f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3014f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Now figure out when the next GC should be.  We'll allow the
3015f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      number of VTSs to double before GCing again.  Except of course
3016f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      that since we can't (or, at least, don't) shrink vts_tab, we
3017ad4e979f408239dabbaae955d8ffcb84a51a5c85florian      can't set the threshold value smaller than it. */
3018f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(nFreed <= nTab);
3019f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   nLive = nTab - nFreed;
3020f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(nLive >= 0 && nLive <= nTab);
3021f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vts_next_GC_at = 2 * nLive;
3022f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (vts_next_GC_at < nTab)
3023f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      vts_next_GC_at = nTab;
3024f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3025f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (show_stats) {
3026f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      show_vts_stats("after GC");
3027f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3028f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
3029f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
30302bd2326e8361a752dfbd4eced9a61b6224f05272philippe   stats__vts_tab_GC++;
30315e2ac3b459ed6b1c8e6978b2ef18d73e1ad9cf14sewardj   if (VG_(clo_stats)) {
3032f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(nTab > 0);
3033d024ae5ca3949fa1fa9bee62825c4e0d2bda31e3sewardj      VG_(message)(Vg_DebugMsg,
3034f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   "libhb: VTS GC: #%lu  old size %lu  live %lu  (%2llu%%)\n",
3035f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   stats__vts_tab_GC,
3036f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
3037f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
3038ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* ---------- END VTS GC ---------- */
3039ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3040ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Decide whether to do VTS pruning.  We have one of three
3041ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      settings. */
3042ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   static UInt pruning_auto_ctr = 0; /* do not make non-static */
3043ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3044ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Bool do_pruning = False;
3045ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   switch (HG_(clo_vts_pruning)) {
3046ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      case 0: /* never */
3047ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         break;
3048ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      case 1: /* auto */
3049ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         do_pruning = (++pruning_auto_ctr % 5) == 0;
3050ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         break;
3051ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      case 2: /* always */
3052ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         do_pruning = True;
3053ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         break;
3054ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      default:
3055ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(0);
3056ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
3057ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3058ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* The rest of this routine only handles pruning, so we can
3059ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      quit at this point if it is not to be done. */
3060ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (!do_pruning)
3061ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      return;
3062c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   /* No need to do pruning if no thread died since the last pruning as
3063c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      no VtsTE can be pruned. */
3064c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3065c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      return;
3066ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3067ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* ---------- BEGIN VTS PRUNING ---------- */
3068c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   /* Sort and check the very dead threads that died since the last pruning.
3069c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      Sorting is used for the check and so that we can quickly look
3070ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      up the dead-thread entries as we work through the VTSs. */
3071c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
3072ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3073ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* We will run through the old table, and create a new table and
3074a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      set, at the same time setting the u.remap entries in the old
3075ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      table to point to the new entries.  Then, visit every VtsID in
3076ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      the system, and replace all of them with new ones, using the
3077a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      u.remap entries in the old table.  Finally, we can delete the old
3078ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      table and set. */
3079ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3080ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   XArray* /* of VtsTE */ new_tab
3081ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3082ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                    HG_(free), sizeof(VtsTE) );
3083ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3084ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* WordFM VTS* void */
3085ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   WordFM* new_set
3086ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3087ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                    HG_(free),
3088ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                    (Word(*)(UWord,UWord))VTS__cmp_structural );
3089ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3090ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Visit each old VTS.  For each one:
3091ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3092ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      * make a pruned version
3093ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3094ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      * search new_set for the pruned version, yielding either
3095ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        Nothing (not present) or the new VtsID for it.
3096ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3097ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      * if not present, allocate a new VtsID for it, insert (pruned
3098ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        VTS, new VtsID) in the tree, and set
3099ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        remap_table[old VtsID] = new VtsID.
3100ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3101ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      * if present, set remap_table[old VtsID] = new VtsID, where
3102ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        new VtsID was determined by the tree lookup.  Then free up
3103ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        the clone.
3104ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   */
3105ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3106ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   UWord nBeforePruning = 0, nAfterPruning = 0;
3107ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   UWord nSTSsBefore = 0, nSTSsAfter = 0;
3108ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VtsID new_VtsID_ctr = 0;
3109ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3110ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   for (i = 0; i < nTab; i++) {
3111ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3112ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* For each old VTS .. */
3113ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VtsTE* old_te  = VG_(indexXA)( vts_tab, i );
3114ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VTS*   old_vts = old_te->vts;
3115ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3116ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* Skip it if not in use */
3117ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      if (old_te->rc == 0) {
3118ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(old_vts == NULL);
3119ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         continue;
3120ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      }
3121a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      tl_assert(old_te->u.remap == VtsID_INVALID);
3122ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(old_vts != NULL);
3123ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(old_vts->id == i);
3124ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(old_vts->ts != NULL);
3125ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3126ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* It is in use. Make a pruned version. */
3127ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      nBeforePruning++;
3128ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      nSTSsBefore += old_vts->usedTS;
3129ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
3130c3508652c3a00c0d0035603a7d738f2fe47e9331philippe                                   old_vts, verydead_thread_table_not_pruned);
3131ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(new_vts->sizeTS == new_vts->usedTS);
3132ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3133ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                == 0x0ddC0ffeeBadF00dULL);
3134ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3135ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* Get rid of the old VTS and the tree entry.  It's a bit more
3136ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         complex to incrementally delete the VTSs now than to nuke
3137ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         them all after we're done, but the upside is that we don't
3138ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         wind up temporarily storing potentially two complete copies
3139ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         of each VTS and hence spiking memory use. */
3140ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      UWord oldK = 0, oldV = 12345;
3141ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      Bool  present = VG_(delFromFM)( vts_set,
3142ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                                      &oldK, &oldV, (UWord)old_vts );
3143ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(present); /* else it isn't in vts_set ?! */
3144ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3145ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3146ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* now free the VTS itself */
3147ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VTS__delete(old_vts);
3148ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      old_te->vts = NULL;
3149ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      old_vts = NULL;
3150ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3151ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* NO MENTIONS of old_vts allowed beyond this point. */
3152ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3153ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* Ok, we have the pruned copy in new_vts.  See if a
3154ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         structurally identical version is already present in new_set.
3155ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         If so, delete the one we just made and move on; if not, add
3156ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         it. */
3157ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VTS*  identical_version = NULL;
3158ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      UWord valW = 12345;
3159ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3160ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                        (UWord)new_vts)) {
3161ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         // already have it
3162ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(valW == 0);
3163ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(identical_version != NULL);
3164ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(identical_version != new_vts);
3165ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         VTS__delete(new_vts);
3166ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         new_vts = identical_version;
3167ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(new_vts->id != VtsID_INVALID);
3168ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      } else {
3169ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(valW == 12345);
3170ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(identical_version == NULL);
3171ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         new_vts->id = new_VtsID_ctr++;
3172ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3173ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(!b);
3174ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         VtsTE new_te;
3175ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         new_te.vts      = new_vts;
3176ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         new_te.rc       = 0;
3177a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe         new_te.u.freelink = VtsID_INVALID;
3178ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         Word j = VG_(addToXA)( new_tab, &new_te );
3179ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(j <= i);
3180ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(j == new_VtsID_ctr - 1);
3181ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         // stats
3182ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         nAfterPruning++;
3183ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         nSTSsAfter += new_vts->usedTS;
3184ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      }
3185a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      old_te->u.remap = new_vts->id;
3186ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3187ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   } /* for (i = 0; i < nTab; i++) */
3188ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3189c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   /* Move very dead thread from verydead_thread_table_not_pruned to
3190c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      verydead_thread_table. Sort and check verydead_thread_table
3191c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      to verify a thread was reported very dead only once. */
3192c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   {
3193c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3194c3508652c3a00c0d0035603a7d738f2fe47e9331philippe
3195c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      for (i = 0; i < nBT; i++) {
3196c3508652c3a00c0d0035603a7d738f2fe47e9331philippe         ThrID thrid =
3197c3508652c3a00c0d0035603a7d738f2fe47e9331philippe            *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3198c3508652c3a00c0d0035603a7d738f2fe47e9331philippe         VG_(addToXA)( verydead_thread_table, &thrid );
3199c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      }
3200c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      verydead_thread_table_sort_and_check (verydead_thread_table);
3201c3508652c3a00c0d0035603a7d738f2fe47e9331philippe      VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3202c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   }
3203c3508652c3a00c0d0035603a7d738f2fe47e9331philippe
3204ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* At this point, we have:
3205a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      * the old VTS table, with its u.remap entries set,
3206ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        and with all .vts == NULL.
3207ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      * the old VTS tree should be empty, since it and the old VTSs
3208ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        it contained have been incrementally deleted was we worked
3209ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        through the old table.
3210a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      * the new VTS table, with all .rc == 0, all u.freelink and u.remap
3211ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj        == VtsID_INVALID.
3212ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      * the new VTS tree.
3213ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   */
3214ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert( VG_(sizeFM)(vts_set) == 0 );
3215ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3216ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Now actually apply the mapping. */
3217ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Visit all the VtsIDs in the entire system.  Where do we expect
3218ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      to find them?
3219ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      (a) in shadow memory -- the LineZs and LineFs
3220ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      (b) in our collection of struct _Thrs.
3221ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      (c) in our collection of struct _SOs.
3222ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      Nowhere else, AFAICS.  Not in the zsm cache, because that just
3223ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      got invalidated.
3224ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3225a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      Using the u.remap fields in vts_tab, map each old VtsID to a new
3226ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VtsID.  For each old VtsID, dec its rc; and for each new one,
3227ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      inc it.  This sets up the new refcounts, and it also gives a
3228ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      cheap sanity check of the old ones: all old refcounts should be
3229ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      zero after this operation.
3230ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   */
3231ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3232ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Do the mappings for (a) above: iterate over the Primary shadow
3233ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      mem map (WordFM Addr SecMap*). */
3234ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   UWord secmapW = 0;
3235ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VG_(initIterFM)( map_shmem );
3236ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3237ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      UWord   j;
3238ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      SecMap* sm = (SecMap*)secmapW;
3239ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(sm->magic == SecMap_MAGIC);
3240ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* Deal with the LineZs */
3241ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      for (i = 0; i < N_SECMAP_ZLINES; i++) {
3242ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         LineZ* lineZ = &sm->linesZ[i];
324371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         if (lineZ->dict[0] != SVal_INVALID) {
324471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe            for (j = 0; j < 4; j++)
324571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
324671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         } else {
324771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe            LineF* lineF = SVal2Ptr (lineZ->dict[1]);
324871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe            for (j = 0; j < N_LINE_ARANGE; j++)
324971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
325071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         }
3251ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      }
3252ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
3253ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VG_(doneIterFM)( map_shmem );
3254ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3255ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Do the mappings for (b) above: visit our collection of struct
3256ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      _Thrs. */
3257ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Thread* hgthread = get_admin_threads();
3258ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(hgthread);
3259ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   while (hgthread) {
3260ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      Thr* hbthr = hgthread->hbthr;
3261ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(hbthr);
3262ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* Threads that are listed in the prunable set have their viR
3263ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         and viW set to VtsID_INVALID, so we can't mess with them. */
3264ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      if (hbthr->llexit_done && hbthr->joinedwith_done) {
3265ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(hbthr->viR == VtsID_INVALID);
3266ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         tl_assert(hbthr->viW == VtsID_INVALID);
3267ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         hgthread = hgthread->admin;
3268ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         continue;
3269ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      }
3270ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3271ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3272ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      hgthread = hgthread->admin;
3273ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
3274ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3275ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Do the mappings for (c) above: visit the struct _SOs. */
3276ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   SO* so = admin_SO;
3277ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   while (so) {
3278ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      if (so->viR != VtsID_INVALID)
3279ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         remap_VtsID( vts_tab, new_tab, &so->viR );
3280ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      if (so->viW != VtsID_INVALID)
3281ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         remap_VtsID( vts_tab, new_tab, &so->viW );
3282ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      so = so->admin_next;
3283ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
3284ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3285ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* So, we're nearly done (with this incredibly complex operation).
3286ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      Check the refcounts for the old VtsIDs all fell to zero, as
3287ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      expected.  Any failure is serious. */
3288ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   for (i = 0; i < nTab; i++) {
3289ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VtsTE* te = VG_(indexXA)( vts_tab, i );
3290ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(te->vts == NULL);
3291ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      /* This is the assert proper.  Note we're also asserting
3292a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe         zeroness for old entries which are unmapped.  That's OK. */
3293ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(te->rc == 0);
3294ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
3295ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3296ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Install the new table and set. */
3297ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3298ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   vts_set = new_set;
3299ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VG_(deleteXA)( vts_tab );
3300ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   vts_tab = new_tab;
3301ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3302ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* The freelist of vts_tab entries is empty now, because we've
3303ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      compacted all of the live entries at the low end of the
3304ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      table. */
3305ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   vts_tab_freelist = VtsID_INVALID;
3306ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3307ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Sanity check vts_set and vts_tab. */
3308ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3309ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Because all the live entries got slid down to the bottom of vts_tab: */
3310ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3311ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3312ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Assert that the vts_tab and vts_set entries point at each other
3313ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      in the required way */
3314ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   UWord wordK = 0, wordV = 0;
3315ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VG_(initIterFM)( vts_set );
3316ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3317ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(wordK != 0);
3318ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(wordV == 0);
3319ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VTS* vts = (VTS*)wordK;
3320ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(vts->id != VtsID_INVALID);
3321ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3322ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(te->vts == vts);
3323ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
3324ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   VG_(doneIterFM)( vts_set );
3325ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3326ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Also iterate over the table, and check each entry is
3327ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      plausible. */
3328ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   nTab = VG_(sizeXA)( vts_tab );
3329ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   for (i = 0; i < nTab; i++) {
3330ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VtsTE* te = VG_(indexXA)( vts_tab, i );
3331ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(te->vts);
3332ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(te->vts->id == i);
3333ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(te->rc > 0); /* 'cos we just GC'd */
3334a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3335a1ac2f4d9512ce19d63861226652d4d5e5f8955cphilippe      /* value of te->u.remap  not relevant */
3336ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
3337ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
3338ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* And we're done.  Bwahahaha. Ha. Ha. Ha. */
33392bd2326e8361a752dfbd4eced9a61b6224f05272philippe   stats__vts_pruning++;
3340ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (VG_(clo_stats)) {
3341ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(nTab > 0);
3342ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VG_(message)(
3343ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         Vg_DebugMsg,
33442bd2326e8361a752dfbd4eced9a61b6224f05272philippe         "libhb: VTS PR: #%lu  before %lu (avg sz %lu)  "
3345ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj            "after %lu (avg sz %lu)\n",
33462bd2326e8361a752dfbd4eced9a61b6224f05272philippe         stats__vts_pruning,
3347ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3348ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj         nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3349ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      );
3350ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
3351ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* ---------- END VTS PRUNING ---------- */
3352f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3353f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3354f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3355f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
3356f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
3357f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// Vts IDs                                             //
3358f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
3359f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
3360f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3361f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//////////////////////////
33627aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj/* A temporary, max-sized VTS which is used as a temporary (the first
33637aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   argument) in VTS__singleton, VTS__tick and VTS__join operations. */
33647aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic VTS* temp_max_sized_VTS = NULL;
33657aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj
33667aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj//////////////////////////
336723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong stats__cmpLEQ_queries = 0;
336823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong stats__cmpLEQ_misses  = 0;
336923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong stats__join2_queries  = 0;
337023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong stats__join2_misses   = 0;
3371f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3372f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline UInt ROL32 ( UInt w, Int n ) {
3373f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   w = (w << n) | (w >> (32-n));
3374f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return w;
3375f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3376f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3377f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3378f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return hash % nTab;
3379f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3380f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
338123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj#define N_CMPLEQ_CACHE 1023
3382f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic
338323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   struct { VtsID vi1; VtsID vi2; Bool leq; }
338423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   cmpLEQ_cache[N_CMPLEQ_CACHE];
3385f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3386f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_JOIN2_CACHE 1023
3387f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic
3388f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct { VtsID vi1; VtsID vi2; VtsID res; }
3389f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   join2_cache[N_JOIN2_CACHE];
3390f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3391f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void VtsID__invalidate_caches ( void ) {
3392f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Int i;
339323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   for (i = 0; i < N_CMPLEQ_CACHE; i++) {
339423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      cmpLEQ_cache[i].vi1 = VtsID_INVALID;
339523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      cmpLEQ_cache[i].vi2 = VtsID_INVALID;
339623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      cmpLEQ_cache[i].leq = False;
3397f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
3398f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_JOIN2_CACHE; i++) {
3399f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj     join2_cache[i].vi1 = VtsID_INVALID;
3400f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj     join2_cache[i].vi2 = VtsID_INVALID;
3401f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj     join2_cache[i].res = VtsID_INVALID;
3402f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
3403f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3404f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//////////////////////////
3405f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3406d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//static Bool VtsID__is_valid ( VtsID vi ) {
3407d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//   VtsTE* ve;
3408d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//   if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3409d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//      return False;
3410d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//   ve = VG_(indexXA)( vts_tab, vi );
3411d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//   if (!ve->vts)
3412d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//      return False;
3413d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//   tl_assert(ve->vts->id == vi);
3414d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//   return True;
3415d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj//}
3416f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3417f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic VTS* VtsID__to_VTS ( VtsID vi ) {
3418f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsTE* te = VG_(indexXA)( vts_tab, vi );
3419f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(te->vts);
3420f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return te->vts;
3421f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3422f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3423f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void VtsID__pp ( VtsID vi ) {
3424f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VTS* vts = VtsID__to_VTS(vi);
3425b28fe8941785b44e63dfedd843e8107ee5a7cf2eflorian   VTS__show( vts );
3426f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3427f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3428f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* compute partial ordering relation of vi1 and vi2. */
3429f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj__attribute__((noinline))
343023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
3431f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UInt hash;
343223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Bool leq;
3433f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VTS  *v1, *v2;
343423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   //if (vi1 == vi2) return True;
3435f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(vi1 != vi2);
3436f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ////++
343723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cmpLEQ_queries++;
343823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
343923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (cmpLEQ_cache[hash].vi1 == vi1
344023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj       && cmpLEQ_cache[hash].vi2 == vi2)
344123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return cmpLEQ_cache[hash].leq;
344223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cmpLEQ_misses++;
3443f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ////--
3444f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   v1  = VtsID__to_VTS(vi1);
3445f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   v2  = VtsID__to_VTS(vi2);
3446e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   leq = VTS__cmpLEQ( v1, v2 ) == 0;
3447f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ////++
344823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   cmpLEQ_cache[hash].vi1 = vi1;
344923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   cmpLEQ_cache[hash].vi2 = vi2;
345023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   cmpLEQ_cache[hash].leq = leq;
3451f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ////--
345223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   return leq;
3453f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
345423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
345523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   return LIKELY(vi1 == vi2)  ? True  : VtsID__cmpLEQ_WRK(vi1, vi2);
3456f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3457f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3458f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* compute binary join */
3459f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj__attribute__((noinline))
3460f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3461f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UInt  hash;
3462f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID res;
34637aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   VTS   *vts1, *vts2;
3464f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   //if (vi1 == vi2) return vi1;
3465f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(vi1 != vi2);
3466f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ////++
3467f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__join2_queries++;
3468f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3469f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (join2_cache[hash].vi1 == vi1
3470f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj       && join2_cache[hash].vi2 == vi2)
3471f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return join2_cache[hash].res;
3472f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__join2_misses++;
3473f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ////--
3474f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vts1 = VtsID__to_VTS(vi1);
3475f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vts2 = VtsID__to_VTS(vi2);
34767aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   temp_max_sized_VTS->usedTS = 0;
34777aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   VTS__join(temp_max_sized_VTS, vts1,vts2);
34787aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3479f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ////++
3480f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   join2_cache[hash].vi1 = vi1;
3481f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   join2_cache[hash].vi2 = vi2;
3482f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   join2_cache[hash].res = res;
3483f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ////--
3484f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return res;
3485f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3486f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
34871c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   return LIKELY(vi1 == vi2)  ? vi1  : VtsID__join2_WRK(vi1, vi2);
3488f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3489f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3490f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* create a singleton VTS, namely [thr:1] */
3491f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
34927aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   temp_max_sized_VTS->usedTS = 0;
34937aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   VTS__singleton(temp_max_sized_VTS, thr,tym);
34947aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3495f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3496f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3497f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* tick operation, creates value 1 if specified index is absent */
3498f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3499f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VTS* vts = VtsID__to_VTS(vi);
35007aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   temp_max_sized_VTS->usedTS = 0;
35017aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   VTS__tick(temp_max_sized_VTS, idx,vts);
35027aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3503f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3504f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3505f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* index into a VTS (only for assertions) */
3506f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3507f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VTS* vts = VtsID__to_VTS(vi);
3508f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return VTS__indexAt_SLOW( vts, idx );
3509f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
3510f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
351123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
351223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   any, really) element in vi1 which is pointwise greater-than the
351323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   corresponding element in vi2.  If no such element exists, return
351423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   NULL.  This needs to be fairly quick since it is called every time
351523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   a race is detected. */
351623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
351723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
351823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   VTS  *vts1, *vts2;
3519e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   Thr*  diffthr;
3520e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   ThrID diffthrid;
352123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(vi1 != vi2);
352223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   vts1 = VtsID__to_VTS(vi1);
352323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   vts2 = VtsID__to_VTS(vi2);
352423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(vts1 != vts2);
3525e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   diffthrid = VTS__cmpLEQ(vts1, vts2);
3526e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   diffthr = Thr__from_ThrID(diffthrid);
352723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(diffthr); /* else they are LEQ ! */
352823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   return diffthr;
352923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
353023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
353123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
353223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/////////////////////////////////////////////////////////
353323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj//                                                     //
353423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj// Filters                                             //
353523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj//                                                     //
353623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/////////////////////////////////////////////////////////
353723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
353823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* Forget everything we know -- clear the filter and let everything
353923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   through.  This needs to be as fast as possible, since it is called
354023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   every time the running thread changes, and every time a thread's
354123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   vector clocks change, which can be quite frequent.  The obvious
354223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   fast way to do this is simply to stuff in tags which we know are
354323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   not going to match anything, since they're not aligned to the start
354423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   of a line. */
35456bd9dc18c043927c1196caba20a327238a179c42florianstatic void Filter__clear ( Filter* fi, const HChar* who )
354623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
354723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   UWord i;
354823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0) VG_(printf)("  Filter__clear(%p, %s)\n", fi, who);
354923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   for (i = 0; i < FI_NUM_LINES; i += 8) {
355023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      fi->tags[i+0] = 1; /* impossible value -- cannot match */
355123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      fi->tags[i+1] = 1;
355223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      fi->tags[i+2] = 1;
355323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      fi->tags[i+3] = 1;
355423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      fi->tags[i+4] = 1;
355523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      fi->tags[i+5] = 1;
355623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      fi->tags[i+6] = 1;
355723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      fi->tags[i+7] = 1;
355823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
355923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(i == FI_NUM_LINES);
356023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
356123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
356223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* Clearing an arbitrary range in the filter.  Unfortunately
356323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   we have to do this due to core-supplied new/die-mem events. */
356423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
356523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void Filter__clear_1byte ( Filter* fi, Addr a )
356623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
356723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
356823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
356923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   FiLine* line   = &fi->lines[lineno];
357023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   UWord   loff   = (a - atag) / 8;
357123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   UShort  mask   = 0x3 << (2 * (a & 7));
357223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
357323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY( fi->tags[lineno] == atag )) {
357423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* hit.  clear the bits. */
357523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      UShort  u16  = line->u16s[loff];
357623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      line->u16s[loff] = u16 & ~mask; /* clear them */
357723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   } else {
357823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* miss.  The filter doesn't hold this address, so ignore. */
357923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
358023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
358123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
358223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
358323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
358423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
358523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
358623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   FiLine* line   = &fi->lines[lineno];
358723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   UWord   loff   = (a - atag) / 8;
358823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY( fi->tags[lineno] == atag )) {
358923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      line->u16s[loff] = 0;
359023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   } else {
359123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj    /* miss.  The filter doesn't hold this address, so ignore. */
359223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
359323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
359423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
3595fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe/* Only used to verify the fast Filter__clear_range */
3596fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe__attribute__((unused))
3597fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippestatic void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
359823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
3599fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   tl_assert (CHECK_ZSM);
3600fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
360123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* slowly do part preceding 8-alignment */
360223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
360323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Filter__clear_1byte( fi, a );
360423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      a++;
360523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      len--;
360623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
360723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* vector loop */
360823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   while (len >= 8) {
360923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Filter__clear_8bytes_aligned( fi, a );
361023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      a += 8;
361123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      len -= 8;
361223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
361323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* slowly do tail */
361423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   while (UNLIKELY(len > 0)) {
361523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Filter__clear_1byte( fi, a );
361623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      a++;
361723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      len--;
361823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
361923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
362023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
3621fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippestatic void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3622fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe{
3623fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe#  if CHECK_ZSM > 0
3624fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   /* We check the below more complex algorithm with the simple one.
3625fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      This check is very expensive : we do first the slow way on a
3626fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      copy of the data, then do it the fast way. On RETURN, we check
3627fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      the two values are equal. */
3628fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   Filter fi_check = *fi;
3629fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   Filter__clear_range_SLOW(&fi_check, a, len);
3630fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe#  define RETURN goto check_and_return
3631fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe#  else
3632fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe#  define RETURN return
3633fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe#  endif
3634fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3635fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   Addr    begtag = FI_GET_TAG(a);       /* tag of range begin */
3636fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3637fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   Addr    end = a + len - 1;
3638fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   Addr    endtag = FI_GET_TAG(end); /* tag of range end. */
3639fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3640fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   UWord rlen = len; /* remaining length to clear */
3641fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3642fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   Addr    c = a; /* Current position we are clearing. */
3643fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   UWord   clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3644fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   FiLine* cline; /* Current line we are clearing */
3645fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   UWord   cloff; /* Current offset in line we are clearing, when clearing
3646fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe                     partial lines. */
3647fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3648fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   UShort u16;
3649fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3650fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   STATIC_ASSERT (FI_LINE_SZB == 32);
3651fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   // Below assumes filter lines are 32 bytes
3652fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3653fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   if (LIKELY(fi->tags[clineno] == begtag)) {
3654fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3655fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      /* First filter line matches begtag.
3656fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         If c is not at the filter line begin, the below will clear
3657fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         the filter line bytes starting from c. */
3658fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      cline = &fi->lines[clineno];
3659fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      cloff = (c - begtag) / 8;
3660fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3661fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      /* First the byte(s) needed to reach 8-alignment */
3662fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3663fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         /* hiB is the nr of bytes (higher addresses) from c to reach
3664fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            8-aligment. */
3665fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         UWord hiB = 8 - (c & 7);
3666fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3667fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            mask is  C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3668fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            range    7..7   6..7  5..7  4..7  3..7  2..7    1..7 */
3669fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         UShort mask = 0xFFFF << (16 - 2*hiB);
3670fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3671fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         u16  = cline->u16s[cloff];
3672fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         if (LIKELY(rlen >= hiB)) {
3673fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3674fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            rlen -= hiB;
3675fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            c += hiB;
3676fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            cloff += 1;
3677fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         } else {
3678fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            /* Only have the bits for rlen bytes bytes. */
3679fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3680fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3681fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            RETURN;  // We have cleared all what we can.
3682fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         }
3683fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      }
3684fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      /* c is now 8 aligned. Clear by 8 aligned bytes,
3685fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         till c is filter-line aligned */
3686fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3687fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         cline->u16s[cloff] = 0;
3688fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         c += 8;
3689fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         rlen -= 8;
3690fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         cloff += 1;
3691fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      }
3692fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   } else {
3693fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      c = begtag + FI_LINE_SZB;
3694fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      if (c > end)
3695fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         RETURN;   // We have cleared all what we can.
3696fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      rlen -= c - a;
3697fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   }
3698fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   // We have changed c, so re-establish clineno.
3699fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   clineno = FI_GET_LINENO(c);
3700fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3701fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   if (rlen >= FI_LINE_SZB) {
3702fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      /* Here, c is filter line-aligned. Clear all full lines that
3703fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         overlap with the range starting at c, made of a full lines */
3704fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      UWord nfull = rlen / FI_LINE_SZB;
3705fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      UWord full_len = nfull * FI_LINE_SZB;
3706fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      rlen -= full_len;
3707fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      if (nfull > FI_NUM_LINES)
3708fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         nfull = FI_NUM_LINES; // no need to check several times the same entry.
3709fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3710fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      for (UWord n = 0; n < nfull; n++) {
3711fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3712fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            cline = &fi->lines[clineno];
3713fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            cline->u16s[0] = 0;
3714fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            cline->u16s[1] = 0;
3715fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            cline->u16s[2] = 0;
3716fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            cline->u16s[3] = 0;
3717fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3718fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         }
3719fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         clineno++;
3720fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         if (UNLIKELY(clineno == FI_NUM_LINES))
3721fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            clineno = 0;
3722fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      }
3723fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3724fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      c += full_len;
3725fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      clineno = FI_GET_LINENO(c);
3726fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   }
3727fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3728fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   if (CHECK_ZSM) {
3729fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      tl_assert(VG_IS_8_ALIGNED(c));
3730fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      tl_assert(clineno == FI_GET_LINENO(c));
3731fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   }
3732fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3733fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   /* Do the last filter line, if it was not cleared as a full filter line */
3734fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3735fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      cline = &fi->lines[clineno];
3736fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      cloff = (c - endtag) / 8;
3737fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3738fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3739fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3740fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         8 bytes. */
3741fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      while (rlen >= 8) {
3742fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         cline->u16s[cloff] = 0;
3743fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         c += 8;
3744fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         rlen -= 8;
3745fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         cloff += 1;
3746fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      }
3747fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      /* Then the remaining byte(s) */
3748fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      if (rlen > 0) {
3749fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         /* nr of bytes from c to reach end. */
3750fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         UWord loB = rlen;
3751fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         /* Compute mask representing loB bytes [c..c+loB[ :
3752fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe            mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3753fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         UShort mask = 0xFFFF >> (16 - 2*loB);
3754fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3755fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         u16  = cline->u16s[cloff];
3756fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe         cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3757fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe      }
3758fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   }
3759fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe
3760fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe#  if CHECK_ZSM > 0
3761fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   check_and_return:
3762fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe   tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3763fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe#  endif
3764fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe#  undef RETURN
3765fc00a2a1f86ba77f905c8ee34b3c8c949bb9c107philippe}
376623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
376723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* ------ Read handlers for the filter. ------ */
376823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
376923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
377023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
377123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
377223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return False;
377323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   {
377423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
377523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
377623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     FiLine* line   = &fi->lines[lineno];
377723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   loff   = (a - atag) / 8;
377823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UShort  mask   = 0xAAAA;
377923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     if (LIKELY( fi->tags[lineno] == atag )) {
378023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* hit.  check line and update. */
378123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UShort u16  = line->u16s[loff];
378223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        Bool   ok   = (u16 & mask) == mask; /* all R bits set? */
378323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = u16 | mask; /* set them */
378423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return ok;
378523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } else {
378623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* miss.  nuke existing line and re-use it. */
378723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UWord i;
378823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        fi->tags[lineno] = atag;
378923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        for (i = 0; i < FI_LINE_SZB / 8; i++)
379023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           line->u16s[i] = 0;
379123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = mask;
379223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return False;
379323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     }
379423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
379523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
379623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
379723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
379823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
379923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
380023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return False;
380123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   {
380223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
380323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
380423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     FiLine* line   = &fi->lines[lineno];
380523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   loff   = (a - atag) / 8;
380623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UShort  mask   = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
380723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     if (LIKELY( fi->tags[lineno] == atag )) {
380823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* hit.  check line and update. */
380923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UShort  u16  = line->u16s[loff];
381023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        Bool    ok   = (u16 & mask) == mask; /* 4 x R bits set? */
381123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = u16 | mask; /* set them */
381223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return ok;
381323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } else {
381423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* miss.  nuke existing line and re-use it. */
381523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UWord   i;
381623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        fi->tags[lineno] = atag;
381723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        for (i = 0; i < FI_LINE_SZB / 8; i++)
381823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           line->u16s[i] = 0;
381923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = mask;
382023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return False;
382123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     }
382223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
382323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
382423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
382523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
382623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
382723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
382823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return False;
382923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   {
383023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
383123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
383223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     FiLine* line   = &fi->lines[lineno];
383323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   loff   = (a - atag) / 8;
383423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UShort  mask   = 0xA << (2 * (a & 6));
383523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     /* mask is A000, 0A00, 00A0 or 000A */
383623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     if (LIKELY( fi->tags[lineno] == atag )) {
383723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* hit.  check line and update. */
383823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UShort  u16  = line->u16s[loff];
383923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        Bool    ok   = (u16 & mask) == mask; /* 2 x R bits set? */
384023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = u16 | mask; /* set them */
384123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return ok;
384223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } else {
384323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* miss.  nuke existing line and re-use it. */
384423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UWord   i;
384523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        fi->tags[lineno] = atag;
384623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        for (i = 0; i < FI_LINE_SZB / 8; i++)
384723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           line->u16s[i] = 0;
384823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = mask;
384923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return False;
385023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     }
385123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
385223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
385323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
385423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
385523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
385623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   {
385723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
385823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
385923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     FiLine* line   = &fi->lines[lineno];
386023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   loff   = (a - atag) / 8;
386123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UShort  mask   = 0x2 << (2 * (a & 7));
386223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
386323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     if (LIKELY( fi->tags[lineno] == atag )) {
386423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* hit.  check line and update. */
386523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UShort  u16  = line->u16s[loff];
386623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        Bool    ok   = (u16 & mask) == mask; /* 1 x R bits set? */
386723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = u16 | mask; /* set them */
386823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return ok;
386923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } else {
387023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* miss.  nuke existing line and re-use it. */
387123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UWord   i;
387223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        fi->tags[lineno] = atag;
387323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        for (i = 0; i < FI_LINE_SZB / 8; i++)
387423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           line->u16s[i] = 0;
387523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = mask;
387623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return False;
387723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     }
387823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
387923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
388023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
388123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
388223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* ------ Write handlers for the filter. ------ */
388323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
388423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
388523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
388623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
388723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return False;
388823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   {
388923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
389023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
389123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     FiLine* line   = &fi->lines[lineno];
389223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   loff   = (a - atag) / 8;
389323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UShort  mask   = 0xFFFF;
389423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     if (LIKELY( fi->tags[lineno] == atag )) {
389523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* hit.  check line and update. */
389623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UShort u16  = line->u16s[loff];
389723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        Bool   ok   = (u16 & mask) == mask; /* all R & W bits set? */
389823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = u16 | mask; /* set them */
389923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return ok;
390023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } else {
390123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* miss.  nuke existing line and re-use it. */
390223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UWord i;
390323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        fi->tags[lineno] = atag;
390423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        for (i = 0; i < FI_LINE_SZB / 8; i++)
390523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           line->u16s[i] = 0;
390623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = mask;
390723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return False;
390823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     }
390923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
391023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
391123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
391223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
391323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
391423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
391523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return False;
391623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   {
391723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
391823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
391923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     FiLine* line   = &fi->lines[lineno];
392023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   loff   = (a - atag) / 8;
392123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UShort  mask   = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
392223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     if (LIKELY( fi->tags[lineno] == atag )) {
392323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* hit.  check line and update. */
392423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UShort  u16  = line->u16s[loff];
392523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        Bool    ok   = (u16 & mask) == mask; /* 4 x R & W bits set? */
392623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = u16 | mask; /* set them */
392723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return ok;
392823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } else {
392923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* miss.  nuke existing line and re-use it. */
393023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UWord   i;
393123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        fi->tags[lineno] = atag;
393223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        for (i = 0; i < FI_LINE_SZB / 8; i++)
393323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           line->u16s[i] = 0;
393423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = mask;
393523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return False;
393623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     }
393723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
393823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
393923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
394023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
394123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
394223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
394323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return False;
394423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   {
394523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
394623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
394723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     FiLine* line   = &fi->lines[lineno];
394823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   loff   = (a - atag) / 8;
394923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UShort  mask   = 0xF << (2 * (a & 6));
395023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     /* mask is F000, 0F00, 00F0 or 000F */
395123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     if (LIKELY( fi->tags[lineno] == atag )) {
395223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* hit.  check line and update. */
395323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UShort  u16  = line->u16s[loff];
395423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        Bool    ok   = (u16 & mask) == mask; /* 2 x R & W bits set? */
395523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = u16 | mask; /* set them */
395623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return ok;
395723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } else {
395823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* miss.  nuke existing line and re-use it. */
395923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UWord   i;
396023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        fi->tags[lineno] = atag;
396123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        for (i = 0; i < FI_LINE_SZB / 8; i++)
396223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           line->u16s[i] = 0;
396323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = mask;
396423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return False;
396523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     }
396623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
396723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
396823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
396923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
397023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
397123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   {
397223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
397323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
397423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     FiLine* line   = &fi->lines[lineno];
397523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UWord   loff   = (a - atag) / 8;
397623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     UShort  mask   = 0x3 << (2 * (a & 7));
397723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
397823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     if (LIKELY( fi->tags[lineno] == atag )) {
397923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* hit.  check line and update. */
398023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UShort  u16  = line->u16s[loff];
398123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        Bool    ok   = (u16 & mask) == mask; /* 1 x R bits set? */
398223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = u16 | mask; /* set them */
398323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return ok;
398423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } else {
398523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        /* miss.  nuke existing line and re-use it. */
398623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        UWord   i;
398723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        fi->tags[lineno] = atag;
398823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        for (i = 0; i < FI_LINE_SZB / 8; i++)
398923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           line->u16s[i] = 0;
399023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        line->u16s[loff] = mask;
399123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        return False;
399223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     }
399323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
399423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
399523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
3996f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
3997f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
3998f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
3999f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// Threads                                             //
4000f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
4001f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
4002f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4003e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj/* Maps ThrID values to their Thr*s (which contain ThrID values that
4004e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   should point back to the relevant slot in the array.  Lowest
4005e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4006e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardjstatic XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4007e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
4008e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj/* And a counter to dole out ThrID values.  For rationale/background,
4009e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   see comments on definition of ScalarTS (far) above. */
40107aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardjstatic ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
4011e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
4012e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardjstatic ThrID Thr__to_ThrID ( Thr* thr ) {
4013e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   return thr->thrid;
4014e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj}
4015e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardjstatic Thr* Thr__from_ThrID ( UInt thrid ) {
4016e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4017e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   tl_assert(thr->thrid == thrid);
4018e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   return thr;
4019e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj}
4020e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
4021e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardjstatic Thr* Thr__new ( void )
4022e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj{
4023f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4024f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   thr->viR = VtsID_INVALID;
4025f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   thr->viW = VtsID_INVALID;
4026ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   thr->llexit_done = False;
4027ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   thr->joinedwith_done = False;
402823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
4029ca903bb1b0400000f08a549f091ea193804f850cphilippe   if (HG_(clo_history_level) == 1)
4030ca903bb1b0400000f08a549f091ea193804f850cphilippe      thr->local_Kws_n_stacks
4031ca903bb1b0400000f08a549f091ea193804f850cphilippe         = VG_(newXA)( HG_(zalloc),
4032ca903bb1b0400000f08a549f091ea193804f850cphilippe                       "libhb.Thr__new.3 (local_Kws_and_stacks)",
4033ca903bb1b0400000f08a549f091ea193804f850cphilippe                       HG_(free), sizeof(ULong_n_EC) );
4034e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
4035e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   /* Add this Thr* <-> ThrID binding to the mapping, and
4036e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj      cross-check */
4037e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   if (!thrid_to_thr_map) {
4038e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj      thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4039e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj                                     HG_(free), sizeof(Thr*) );
4040e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   }
4041e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
40427aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   if (thrid_counter >= ThrID_MAX_VALID) {
4043e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj      /* We're hosed.  We have to stop. */
4044e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj      scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4045e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   }
4046e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
4047e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   thr->thrid = thrid_counter++;
4048e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4049e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   tl_assert(ix + 1024 == thr->thrid);
4050e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
4051f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return thr;
4052f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4053f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
40548ab2c1303922dd85467e00cb7b5594d73b7043c6sewardjstatic void note_local_Kw_n_stack_for ( Thr* thr )
405523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
405623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Word       nPresent;
405723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   ULong_n_EC pair;
405823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(thr);
4059b712617d02b247b374d1744a530f61fae1780a36sewardj
4060b712617d02b247b374d1744a530f61fae1780a36sewardj   // We only collect this info at history level 1 (approx)
4061b712617d02b247b374d1744a530f61fae1780a36sewardj   if (HG_(clo_history_level) != 1)
4062b712617d02b247b374d1744a530f61fae1780a36sewardj      return;
4063b712617d02b247b374d1744a530f61fae1780a36sewardj
40648ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   /* This is the scalar Kw for thr. */
40658ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   pair.ull = VtsID__indexAt( thr->viW, thr );
406623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   pair.ec  = main_get_EC( thr );
406723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(pair.ec);
40688ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   tl_assert(thr->local_Kws_n_stacks);
406923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
407023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* check that we're not adding duplicates */
40718ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
407223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
407323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* Throw away old stacks, if necessary.  We can't accumulate stuff
407423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      indefinitely. */
40758ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
40768ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
40778ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
40788ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      if (0)
40798ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         VG_(printf)("LOCAL Kw: thr %p,  Kw %llu,  ec %p (!!! gc !!!)\n",
408023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                     thr, pair.ull, pair.ec );
408123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
408223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
408323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (nPresent > 0) {
408423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      ULong_n_EC* prevPair
40858ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
40868ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      tl_assert( prevPair->ull <= pair.ull );
408723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
408823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
408923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (nPresent == 0)
409023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      pair.ec = NULL;
409123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
40928ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
409323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
409423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0)
40958ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      VG_(printf)("LOCAL Kw: thr %p,  Kw %llu,  ec %p\n",
409623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  thr, pair.ull, pair.ec );
409723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0)
409823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      VG_(pp_ExeContext)(pair.ec);
409923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
410023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
41016bd9dc18c043927c1196caba20a327238a179c42florianstatic Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
41026bd9dc18c043927c1196caba20a327238a179c42florian                                       const ULong_n_EC* pair2 )
410323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
410423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (pair1->ull < pair2->ull) return -1;
410523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (pair1->ull > pair2->ull) return 1;
410623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   return 0;
410723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
410823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
4109f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4110f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
4111f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
4112f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// Shadow Values                                       //
4113f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
4114f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
4115f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4116f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4117f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// hb_zsm.h.  We have to do everything else here.
4118f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4119f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* SVal is 64 bit unsigned int.
4120f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4121f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      <---------30--------->    <---------30--------->
4122f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X   C(Rmin,Wmin)
4123f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   10 X--------------------X XX X--------------------X   A: SVal_NOACCESS
412423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   11 0--------------------0 00 0--------------------0   A: SVal_INVALID
412523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
4126f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
4127f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define SVAL_TAGMASK (3ULL << 62)
4128f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4129f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline Bool SVal__isC ( SVal s ) {
4130f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return (0ULL << 62) == (s & SVAL_TAGMASK);
4131f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4132f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4133f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   //tl_assert(VtsID__is_valid(rmini));
4134f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   //tl_assert(VtsID__is_valid(wmini));
4135f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return (((ULong)rmini) << 32) | ((ULong)wmini);
4136f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4137f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline VtsID SVal__unC_Rmin ( SVal s ) {
4138f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(SVal__isC(s));
4139f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return (VtsID)(s >> 32);
4140f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4141f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline VtsID SVal__unC_Wmin ( SVal s ) {
4142f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(SVal__isC(s));
4143f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return (VtsID)(s & 0xFFFFFFFFULL);
4144f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4145f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
414623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool SVal__isA ( SVal s ) {
4147f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return (2ULL << 62) == (s & SVAL_TAGMASK);
4148f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
41495aa09bf5e5e9ab4cd24689f80937a9268b7aaebasewardj__attribute__((unused))
415023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline SVal SVal__mkA ( void ) {
4151f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return 2ULL << 62;
4152f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4153f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4154f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Direct callback from lib_zsm. */
41551475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippestatic inline void SVal__rcinc ( SVal s ) {
4156f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (SVal__isC(s)) {
4157f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcinc( SVal__unC_Rmin(s) );
4158f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcinc( SVal__unC_Wmin(s) );
4159f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4160f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4161f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4162f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Direct callback from lib_zsm. */
41631475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippestatic inline void SVal__rcdec ( SVal s ) {
4164f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (SVal__isC(s)) {
4165f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcdec( SVal__unC_Rmin(s) );
4166f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcdec( SVal__unC_Wmin(s) );
4167f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4168f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4169f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
417071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline void *SVal2Ptr (SVal s)
417171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe{
417271ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   return (void*)(UWord)s;
417371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe}
417471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
417571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippestatic inline SVal Ptr2SVal (void* ptr)
417671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe{
417771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe   return (SVal)(UWord)ptr;
417871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe}
417971ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
418071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe
4181f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4182f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
4183f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
4184f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// Change-event map2                                   //
4185f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
4186f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
4187f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4188f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* This is in two parts:
4189f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
419023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   1. A hash table of RCECs.  This is a set of reference-counted stack
4191f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      traces.  When the reference count of a stack trace becomes zero,
4192f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      it is removed from the set and freed up.  The intent is to have
4193f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a set of stack traces which can be referred to from (2), but to
4194f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      only represent each one once.  The set is indexed/searched by
4195f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ordering on the stack trace vectors.
4196f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4197328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   2. A Hash table of OldRefs.  These store information about each old
4198328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref that we need to record.  Hash table key is the address of the
4199f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      location for which the information is recorded.  For LRU
4200328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      purposes, each OldRef in the hash table is also on a doubly
4201cabdbb5cab3740c7082e44b770a582c8186888e9philippe      linked list maintaining the order in which the OldRef were most
4202cabdbb5cab3740c7082e44b770a582c8186888e9philippe      recently accessed.
4203328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      Each OldRef also maintains the stamp at which it was last accessed.
4204328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      With these stamps, we can quickly check which of 2 OldRef is the
4205328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      'newest', without having to scan the full list of LRU OldRef.
4206f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4207328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      The important part of an OldRef is, however, its acc component.
4208328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      This binds a TSW triple (thread, size, R/W) to an RCEC.
4209849b0ed71673805c5bdc3e44b1743a3d2c1b513dsewardj
4210cabdbb5cab3740c7082e44b770a582c8186888e9philippe      We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4211cabdbb5cab3740c7082e44b770a582c8186888e9philippe      Then we do exact LRU discarding.  For each discarded OldRef we must
4212328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      of course decrement the reference count on the RCEC it
4213f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      refers to, in order that entries from (1) eventually get
4214f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      discarded too.
4215f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj*/
4216f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4217a4b20c089072bdaf83938044ec077a6a89622481philippestatic UWord stats__evm__lookup_found = 0;
4218a4b20c089072bdaf83938044ec077a6a89622481philippestatic UWord stats__evm__lookup_notfound = 0;
4219f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4220328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic UWord stats__ctxt_eq_tsw_eq_rcec = 0;
4221328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic UWord stats__ctxt_eq_tsw_neq_rcec = 0;
4222328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic UWord stats__ctxt_neq_tsw_neq_rcec = 0;
4223f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__ctxt_rcdec_calls = 0;
4224328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic UWord stats__ctxt_rcec_gc_discards = 0;
4225f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4226f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__ctxt_tab_curr = 0;
4227f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__ctxt_tab_max  = 0;
4228f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4229f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__ctxt_tab_qs   = 0;
4230f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic UWord stats__ctxt_tab_cmps = 0;
4231f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4232f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4233f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj///////////////////////////////////////////////////////
4234111544ac7149cc64bb85a0346e8b4187569579besewardj//// Part (1): A hash table of RCECs
4235f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj///
4236f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4237f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_FRAMES 8
4238f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4239f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// (UInt) `echo "Reference Counted Execution Context" | md5sum`
4240f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define RCEC_MAGIC 0xab88abb2UL
4241f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4242f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//#define N_RCEC_TAB 98317 /* prime */
4243f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define N_RCEC_TAB 196613 /* prime */
4244f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4245f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
4246f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   struct _RCEC {
4247d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj      UWord magic;  /* sanity check only */
4248f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      struct _RCEC* next;
4249f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      UWord rc;
4250f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      UWord rcX; /* used for crosschecking */
42516c83d5e38fa00ce5c1532b274b4e61834ec1db48njn      UWord frames_hash;          /* hash of all the frames */
42526c83d5e38fa00ce5c1532b274b4e61834ec1db48njn      UWord frames[N_FRAMES];
4253f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4254f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   RCEC;
4255f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4256cabdbb5cab3740c7082e44b770a582c8186888e9philippe//////////// BEGIN RCEC pool allocator
4257cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic PoolAlloc* rcec_pool_allocator;
4258cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic RCEC* alloc_RCEC ( void ) {
4259cabdbb5cab3740c7082e44b770a582c8186888e9philippe   return VG_(allocEltPA) ( rcec_pool_allocator );
4260cabdbb5cab3740c7082e44b770a582c8186888e9philippe}
4261cabdbb5cab3740c7082e44b770a582c8186888e9philippe
4262cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic void free_RCEC ( RCEC* rcec ) {
4263cabdbb5cab3740c7082e44b770a582c8186888e9philippe   tl_assert(rcec->magic == RCEC_MAGIC);
4264cabdbb5cab3740c7082e44b770a582c8186888e9philippe   VG_(freeEltPA)( rcec_pool_allocator, rcec );
4265cabdbb5cab3740c7082e44b770a582c8186888e9philippe}
4266cabdbb5cab3740c7082e44b770a582c8186888e9philippe//////////// END RCEC pool allocator
4267cabdbb5cab3740c7082e44b770a582c8186888e9philippe
4268f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic RCEC** contextTab = NULL; /* hash table of RCEC*s */
4269f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4270cabdbb5cab3740c7082e44b770a582c8186888e9philippe/* Count of allocated RCEC having ref count > 0 */
4271cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic UWord RCEC_referenced = 0;
4272f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4273f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Gives an arbitrary total order on RCEC .frames fields */
4274f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4275f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word i;
4276f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4277f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
42786c83d5e38fa00ce5c1532b274b4e61834ec1db48njn   if (ec1->frames_hash < ec2->frames_hash) return -1;
42796c83d5e38fa00ce5c1532b274b4e61834ec1db48njn   if (ec1->frames_hash > ec2->frames_hash) return  1;
42806c83d5e38fa00ce5c1532b274b4e61834ec1db48njn   for (i = 0; i < N_FRAMES; i++) {
4281f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (ec1->frames[i] < ec2->frames[i]) return -1;
42826c83d5e38fa00ce5c1532b274b4e61834ec1db48njn      if (ec1->frames[i] > ec2->frames[i]) return  1;
4283f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4284f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return 0;
4285f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4286f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4287f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4288f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Dec the ref of this RCEC. */
4289f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void ctxt__rcdec ( RCEC* ec )
4290f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4291f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__ctxt_rcdec_calls++;
4292f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ec && ec->magic == RCEC_MAGIC);
4293f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ec->rc > 0);
4294f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ec->rc--;
4295cabdbb5cab3740c7082e44b770a582c8186888e9philippe   if (ec->rc == 0)
4296cabdbb5cab3740c7082e44b770a582c8186888e9philippe      RCEC_referenced--;
4297f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4298f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4299f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void ctxt__rcinc ( RCEC* ec )
4300f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4301f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ec && ec->magic == RCEC_MAGIC);
4302cabdbb5cab3740c7082e44b770a582c8186888e9philippe   if (ec->rc == 0)
4303cabdbb5cab3740c7082e44b770a582c8186888e9philippe      RCEC_referenced++;
4304f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ec->rc++;
4305f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4306f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4307f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4308f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4309ad4e979f408239dabbaae955d8ffcb84a51a5c85florian   move it one step closer to the front of the list, so as to make
4310f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   subsequent searches for it cheaper. */
4311f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4312f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4313f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   RCEC *ec0, *ec1, *ec2;
4314f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (ec == *headp)
4315f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(0); /* already at head of list */
4316f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ec != NULL);
4317f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ec0 = *headp;
4318f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ec1 = NULL;
4319f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   ec2 = NULL;
4320f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   while (True) {
4321f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (ec0 == NULL || ec0 == ec) break;
4322f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ec2 = ec1;
4323f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ec1 = ec0;
4324f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ec0 = ec0->next;
4325f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4326f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(ec0 == ec);
4327f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4328f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      RCEC* tmp;
4329f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4330f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         predecessor.  Swap ec0 and ec1, that is, move ec0 one step
4331f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         closer to the start of the list. */
4332f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(ec2->next == ec1);
4333f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(ec1->next == ec0);
4334f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tmp = ec0->next;
4335f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ec2->next = ec0;
4336f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ec0->next = ec1;
4337f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ec1->next = tmp;
4338f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4339f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   else
4340f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4341f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* it's second in the list. */
4342f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(*headp == ec1);
4343f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(ec1->next == ec0);
4344f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ec1->next = ec0->next;
4345f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      ec0->next = ec1;
4346f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      *headp = ec0;
4347f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4348f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4349f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4350f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4351f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Find the given RCEC in the tree, and return a pointer to it.  Or,
4352f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if not present, add the given one to the tree (by making a copy of
4353f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   it, so the caller can immediately deallocate the original) and
4354f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return a pointer to the copy.  The caller can safely have 'example'
4355f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   on its stack, since we will always return a pointer to a copy of
4356f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   it, not to the original.  Note that the inserted node will have .rc
4357ad4e979f408239dabbaae955d8ffcb84a51a5c85florian   of zero and so the caller must immediately increment it. */
4358f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj__attribute__((noinline))
4359f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic RCEC* ctxt__find_or_add ( RCEC* example )
4360f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4361f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord hent;
4362f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   RCEC* copy;
4363f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(example && example->magic == RCEC_MAGIC);
4364f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(example->rc == 0);
4365f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4366f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Search the hash table to see if we already have it. */
4367f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__ctxt_tab_qs++;
43686c83d5e38fa00ce5c1532b274b4e61834ec1db48njn   hent = example->frames_hash % N_RCEC_TAB;
4369f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   copy = contextTab[hent];
4370f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   while (1) {
4371f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (!copy) break;
4372f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(copy->magic == RCEC_MAGIC);
4373f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      stats__ctxt_tab_cmps++;
4374f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (0 == RCEC__cmp_by_frames(copy, example)) break;
4375f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      copy = copy->next;
4376f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4377f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4378f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (copy) {
4379f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(copy != example);
4380f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* optimisation: if it's not at the head of its list, move 1
4381f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         step fwds, to make future searches cheaper */
4382f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (copy != contextTab[hent]) {
4383f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         move_RCEC_one_step_forward( &contextTab[hent], copy );
4384f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
4385f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
4386d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj      copy = alloc_RCEC();
4387f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(copy != example);
4388f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      *copy = *example;
4389f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      copy->next = contextTab[hent];
4390f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      contextTab[hent] = copy;
4391f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      stats__ctxt_tab_curr++;
4392f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4393f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         stats__ctxt_tab_max = stats__ctxt_tab_curr;
4394f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4395f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return copy;
4396f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4397f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4398f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic inline UWord ROLW ( UWord w, Int n )
4399f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4400f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Int bpw = 8 * sizeof(UWord);
4401f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   w = (w << n) | (w >> (bpw-n));
4402f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return w;
4403f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4404f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4405f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj__attribute__((noinline))
4406f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic RCEC* get_RCEC ( Thr* thr )
4407f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4408f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord hash, i;
4409f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   RCEC  example;
4410f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   example.magic = RCEC_MAGIC;
4411f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   example.rc = 0;
4412f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   example.rcX = 0;
4413195623b2e895970a6fdd8c33ba031596682d81c1florian   example.next = NULL;
44146c83d5e38fa00ce5c1532b274b4e61834ec1db48njn   main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
4415f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   hash = 0;
44166c83d5e38fa00ce5c1532b274b4e61834ec1db48njn   for (i = 0; i < N_FRAMES; i++) {
4417f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      hash ^= example.frames[i];
4418f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      hash = ROLW(hash, 19);
4419f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
44206c83d5e38fa00ce5c1532b274b4e61834ec1db48njn   example.frames_hash = hash;
4421f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return ctxt__find_or_add( &example );
4422f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4423f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4424f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj///////////////////////////////////////////////////////
4425bc307e561793f859ead6bb1a921d6ddc9524855asewardj//// Part (2):
4426328d6627c26471332610da3f5a0b9cc3cdd410c7philippe///  A hashtable guest-addr -> OldRef, that refers to (1)
4427328d6627c26471332610da3f5a0b9cc3cdd410c7philippe///  Note: we use the guest address as key. This means that the entries
4428328d6627c26471332610da3f5a0b9cc3cdd410c7philippe///  for multiple threads accessing the same address will land in the same
4429328d6627c26471332610da3f5a0b9cc3cdd410c7philippe///  bucket. It might be nice to have a better distribution of the
4430328d6627c26471332610da3f5a0b9cc3cdd410c7philippe///  OldRef in the hashtable by using ask key the guestaddress ^ tsw.
4431328d6627c26471332610da3f5a0b9cc3cdd410c7philippe///  The problem is that when a race is reported on a ga, we need to retrieve
4432328d6627c26471332610da3f5a0b9cc3cdd410c7philippe///  efficiently the accesses to ga by other threads, only using the ga.
4433328d6627c26471332610da3f5a0b9cc3cdd410c7philippe///  Measurements on firefox have shown that the chain length is reasonable.
4434f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4435ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Records an access: a thread, a context (size & writeness) and the
4436328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   number of held locks. The size (1,2,4,8) is stored as is in szB.
4437328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   Note that szB uses more bits than needed to store a size up to 8.
4438328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   This allows to use a TSW as a fully initialised UInt e.g. in
4439328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   cmp_oldref_tsw. If needed, a more compact representation of szB
4440328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   can be done (e.g. use only 4 bits, or use only 2 bits and encode the
4441328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   size (1,2,4,8) as 00 = 1, 01 = 2, 10 = 4, 11 = 8. */
4442328d6627c26471332610da3f5a0b9cc3cdd410c7philippetypedef
4443328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   struct {
4444ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      UInt      thrid  : SCALARTS_N_THRBITS;
4445328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      UInt      szB    : 32 - SCALARTS_N_THRBITS - 1;
4446ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      UInt      isW    : 1;
4447328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   } TSW; // Thread+Size+Writeness
4448328d6627c26471332610da3f5a0b9cc3cdd410c7philippetypedef
4449328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   struct {
4450328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      TSW       tsw;
4451328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      WordSetID locksHeldW;
4452328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      RCEC*     rcec;
4453ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
4454ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   Thr_n_RCEC;
4455f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4456f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjtypedef
4457cabdbb5cab3740c7082e44b770a582c8186888e9philippe   struct OldRef {
4458328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      struct OldRef *ht_next; // to link hash table nodes together.
4459328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      UWord  ga; // hash_table key, == address for which we record an access.
4460cabdbb5cab3740c7082e44b770a582c8186888e9philippe      struct OldRef *prev; // to refs older than this one
4461cabdbb5cab3740c7082e44b770a582c8186888e9philippe      struct OldRef *next; // to refs newer that this one
4462328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      UWord stamp; // allows to order (by time of access) 2 OldRef
4463328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      Thr_n_RCEC acc;
4464f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4465f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   OldRef;
4466328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4467328d6627c26471332610da3f5a0b9cc3cdd410c7philippe/* Returns the or->tsw as an UInt */
4468328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic inline UInt oldref_tsw (const OldRef* or)
4469328d6627c26471332610da3f5a0b9cc3cdd410c7philippe{
4470328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   return *(const UInt*)(&or->acc.tsw);
4471328d6627c26471332610da3f5a0b9cc3cdd410c7philippe}
4472328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4473328d6627c26471332610da3f5a0b9cc3cdd410c7philippe/* Compare the tsw component for 2 OldRef.
4474328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   Used for OldRef hashtable (which already verifies equality of the
4475328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   'key' part. */
4476328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic Word cmp_oldref_tsw (const void* node1, const void* node2 )
4477328d6627c26471332610da3f5a0b9cc3cdd410c7philippe{
4478328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   const UInt tsw1 = oldref_tsw(node1);
4479328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   const UInt tsw2 = oldref_tsw(node2);
4480328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4481328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   if (tsw1 < tsw2) return -1;
4482328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   if (tsw1 > tsw2) return  1;
4483328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   return 0;
4484328d6627c26471332610da3f5a0b9cc3cdd410c7philippe}
4485328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4486d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj
44876643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe//////////// BEGIN OldRef pool allocator
44886643e96a72e8530a7c8830c02ffb2fb4aee74c88philippestatic PoolAlloc* oldref_pool_allocator;
4489cabdbb5cab3740c7082e44b770a582c8186888e9philippe// Note: We only allocate elements in this pool allocator, we never free them.
4490cabdbb5cab3740c7082e44b770a582c8186888e9philippe// We stop allocating elements at VG_(clo_conflict_cache_size).
4491cabdbb5cab3740c7082e44b770a582c8186888e9philippe//////////// END OldRef pool allocator
4492d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj
4493cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic OldRef mru;
4494cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic OldRef lru;
4495cabdbb5cab3740c7082e44b770a582c8186888e9philippe// A double linked list, chaining all OldREf in a mru/lru order.
4496cabdbb5cab3740c7082e44b770a582c8186888e9philippe// mru/lru are sentinel nodes.
4497cabdbb5cab3740c7082e44b770a582c8186888e9philippe// Whenever an oldref is re-used, its position is changed as the most recently
4498cabdbb5cab3740c7082e44b770a582c8186888e9philippe// used (i.e. pointed to by mru.prev).
4499cabdbb5cab3740c7082e44b770a582c8186888e9philippe// When a new oldref is needed, it is allocated from the pool
4500cabdbb5cab3740c7082e44b770a582c8186888e9philippe//  if we have not yet reached --conflict-cache-size.
4501cabdbb5cab3740c7082e44b770a582c8186888e9philippe// Otherwise, if all oldref have already been allocated,
4502cabdbb5cab3740c7082e44b770a582c8186888e9philippe// the least recently used (i.e. pointed to by lru.next) is re-used.
4503cabdbb5cab3740c7082e44b770a582c8186888e9philippe// When an OldRef is used, it is moved as the most recently used entry
4504cabdbb5cab3740c7082e44b770a582c8186888e9philippe// (i.e. pointed to by mru.prev).
4505cabdbb5cab3740c7082e44b770a582c8186888e9philippe
4506cabdbb5cab3740c7082e44b770a582c8186888e9philippe// Removes r from the double linked list
4507cabdbb5cab3740c7082e44b770a582c8186888e9philippe// Note: we do not need to test for special cases such as
4508cabdbb5cab3740c7082e44b770a582c8186888e9philippe// NULL next or prev pointers, because we have sentinel nodes
4509cabdbb5cab3740c7082e44b770a582c8186888e9philippe// at both sides of the list. So, a node is always forward and
4510cabdbb5cab3740c7082e44b770a582c8186888e9philippe// backward linked.
4511cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic inline void OldRef_unchain(OldRef *r)
4512cabdbb5cab3740c7082e44b770a582c8186888e9philippe{
4513cabdbb5cab3740c7082e44b770a582c8186888e9philippe   r->next->prev = r->prev;
4514cabdbb5cab3740c7082e44b770a582c8186888e9philippe   r->prev->next = r->next;
4515d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj}
4516d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj
4517cabdbb5cab3740c7082e44b770a582c8186888e9philippe// Insert new as the newest OldRef
4518cabdbb5cab3740c7082e44b770a582c8186888e9philippe// Similarly to OldRef_unchain, no need to test for NULL
4519cabdbb5cab3740c7082e44b770a582c8186888e9philippe// pointers, as e.g. mru.prev is always guaranteed to point
4520cabdbb5cab3740c7082e44b770a582c8186888e9philippe// to a non NULL node (lru when the list is empty).
4521cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic inline void OldRef_newest(OldRef *new)
4522cabdbb5cab3740c7082e44b770a582c8186888e9philippe{
4523cabdbb5cab3740c7082e44b770a582c8186888e9philippe   new->next = &mru;
4524cabdbb5cab3740c7082e44b770a582c8186888e9philippe   new->prev = mru.prev;
4525cabdbb5cab3740c7082e44b770a582c8186888e9philippe   mru.prev = new;
4526cabdbb5cab3740c7082e44b770a582c8186888e9philippe   new->prev->next = new;
4527d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj}
4528d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj
4529328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4530328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic VgHashTable* oldrefHT    = NULL; /* Hash table* OldRef* */
4531328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic UWord     oldrefHTN    = 0;    /* # elems in oldrefHT */
4532328d6627c26471332610da3f5a0b9cc3cdd410c7philippe/* Note: the nr of ref in the oldrefHT will always be equal to
4533cabdbb5cab3740c7082e44b770a582c8186888e9philippe   the nr of elements that were allocated from the OldRef pool allocator
4534cabdbb5cab3740c7082e44b770a582c8186888e9philippe   as we never free an OldRef : we just re-use them. */
4535cabdbb5cab3740c7082e44b770a582c8186888e9philippe
4536cabdbb5cab3740c7082e44b770a582c8186888e9philippe
4537cabdbb5cab3740c7082e44b770a582c8186888e9philippe/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4538cabdbb5cab3740c7082e44b770a582c8186888e9philippe   have already been allocated. */
4539cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic OldRef* alloc_or_reuse_OldRef ( void )
4540cabdbb5cab3740c7082e44b770a582c8186888e9philippe{
4541328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   if (oldrefHTN < HG_(clo_conflict_cache_size)) {
4542328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      oldrefHTN++;
4543cabdbb5cab3740c7082e44b770a582c8186888e9philippe      return VG_(allocEltPA) ( oldref_pool_allocator );
4544cabdbb5cab3740c7082e44b770a582c8186888e9philippe   } else {
4545328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      OldRef *oldref_ht;
4546cabdbb5cab3740c7082e44b770a582c8186888e9philippe      OldRef *oldref = lru.next;
4547cabdbb5cab3740c7082e44b770a582c8186888e9philippe
4548cabdbb5cab3740c7082e44b770a582c8186888e9philippe      OldRef_unchain(oldref);
4549328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      oldref_ht = VG_(HT_gen_remove) (oldrefHT, oldref, cmp_oldref_tsw);
4550328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      tl_assert (oldref == oldref_ht);
4551328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ctxt__rcdec( oldref->acc.rcec );
4552cabdbb5cab3740c7082e44b770a582c8186888e9philippe      return oldref;
4553cabdbb5cab3740c7082e44b770a582c8186888e9philippe   }
4554cabdbb5cab3740c7082e44b770a582c8186888e9philippe}
4555cabdbb5cab3740c7082e44b770a582c8186888e9philippe
4556f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
45571669cc73e29e6e8f1ef0e161a9904976bc8a0e36sewardjinline static UInt min_UInt ( UInt a, UInt b ) {
45581669cc73e29e6e8f1ef0e161a9904976bc8a0e36sewardj   return a < b ? a : b;
45591669cc73e29e6e8f1ef0e161a9904976bc8a0e36sewardj}
45601669cc73e29e6e8f1ef0e161a9904976bc8a0e36sewardj
4561a781be6728a28c94aec57f793f4d084456e93684sewardj/* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
4562a781be6728a28c94aec57f793f4d084456e93684sewardj   first interval is lower, 1 if the first interval is higher, and 0
4563a781be6728a28c94aec57f793f4d084456e93684sewardj   if there is any overlap.  Redundant paranoia with casting is there
4564a781be6728a28c94aec57f793f4d084456e93684sewardj   following what looked distinctly like a bug in gcc-4.1.2, in which
4565a781be6728a28c94aec57f793f4d084456e93684sewardj   some of the comparisons were done signedly instead of
4566a781be6728a28c94aec57f793f4d084456e93684sewardj   unsignedly. */
4567a781be6728a28c94aec57f793f4d084456e93684sewardj/* Copied from exp-ptrcheck/sg_main.c */
4568328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic inline Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4569328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                                            Addr a2, SizeT n2 ) {
4570a781be6728a28c94aec57f793f4d084456e93684sewardj   UWord a1w = (UWord)a1;
4571a781be6728a28c94aec57f793f4d084456e93684sewardj   UWord n1w = (UWord)n1;
4572a781be6728a28c94aec57f793f4d084456e93684sewardj   UWord a2w = (UWord)a2;
4573a781be6728a28c94aec57f793f4d084456e93684sewardj   UWord n2w = (UWord)n2;
4574a781be6728a28c94aec57f793f4d084456e93684sewardj   tl_assert(n1w > 0 && n2w > 0);
4575a781be6728a28c94aec57f793f4d084456e93684sewardj   if (a1w + n1w <= a2w) return -1L;
4576a781be6728a28c94aec57f793f4d084456e93684sewardj   if (a2w + n2w <= a1w) return 1L;
4577a781be6728a28c94aec57f793f4d084456e93684sewardj   return 0;
4578a781be6728a28c94aec57f793f4d084456e93684sewardj}
4579a781be6728a28c94aec57f793f4d084456e93684sewardj
4580328d6627c26471332610da3f5a0b9cc3cdd410c7philippestatic UWord event_map_stamp = 0; // Used to stamp each OldRef when touched.
4581328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4582c5ea9961f9705b956742ae8c553c76caa2da8c29sewardjstatic void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
4583f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4584328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   OldRef  example;
4585d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj   OldRef* ref;
4586c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj   RCEC*   rcec;
4587f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4588ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(thr);
4589ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ThrID thrid = thr->thrid;
4590ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4591ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
4592ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   WordSetID locksHeldW = thr->hgthread->locksetW;
4593ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
4594c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj   rcec = get_RCEC( thr );
4595c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj
4596328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   tl_assert (szB == 4 || szB == 8 ||szB == 1 || szB == 2);
4597328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   // Check for most frequent cases first
4598328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   // Note: we could support a szB up to 1 << (32 - SCALARTS_N_THRBITS - 1)
4599f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4600328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   /* Look in the oldrefHT to see if we already have a record for this
4601328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      address/thr/sz/isW. */
4602328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   example.ga = a;
4603328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   example.acc.tsw = (TSW) {.thrid = thrid,
4604328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                            .szB = szB,
4605328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                            .isW = (UInt)(isW & 1)};
4606328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   ref = VG_(HT_gen_lookup) (oldrefHT, &example, cmp_oldref_tsw);
4607f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4608328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   if (ref) {
4609328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      /* We already have a record for this address and this (thrid, R/W,
4610849b0ed71673805c5bdc3e44b1743a3d2c1b513dsewardj         size) triple. */
4611cabdbb5cab3740c7082e44b770a582c8186888e9philippe      tl_assert (ref->ga == a);
4612f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4613328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      /* thread 'thr' has an entry.  Update its RCEC, if it differs. */
4614328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      if (rcec == ref->acc.rcec)
4615328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         stats__ctxt_eq_tsw_eq_rcec++;
4616328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      else {
4617328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         stats__ctxt_eq_tsw_neq_rcec++;
4618328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         ctxt__rcdec( ref->acc.rcec );
4619328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         ctxt__rcinc(rcec);
4620328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         ref->acc.rcec       = rcec;
4621f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
4622328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      tl_assert(ref->acc.tsw.thrid == thrid);
4623328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      /* Update the stamp, RCEC and the W-held lockset. */
4624328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref->stamp = event_map_stamp;
4625328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref->acc.locksHeldW = locksHeldW;
4626f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4627cabdbb5cab3740c7082e44b770a582c8186888e9philippe      OldRef_unchain(ref);
4628cabdbb5cab3740c7082e44b770a582c8186888e9philippe      OldRef_newest(ref);
4629f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4630f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
4631328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      /* We don't have a record for this address+triple.  Create a new one. */
4632328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      stats__ctxt_neq_tsw_neq_rcec++;
4633cabdbb5cab3740c7082e44b770a582c8186888e9philippe      ref = alloc_or_reuse_OldRef();
4634cabdbb5cab3740c7082e44b770a582c8186888e9philippe      ref->ga = a;
4635328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref->acc.tsw = (TSW) {.thrid  = thrid,
4636328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                            .szB    = szB,
4637328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                            .isW    = (UInt)(isW & 1)};
4638328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref->stamp = event_map_stamp;
4639328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref->acc.locksHeldW = locksHeldW;
4640328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref->acc.rcec       = rcec;
4641328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ctxt__rcinc(rcec);
4642328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4643328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      VG_(HT_add_node) ( oldrefHT, ref );
4644cabdbb5cab3740c7082e44b770a582c8186888e9philippe      OldRef_newest (ref);
4645f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4646328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   event_map_stamp++;
4647f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4648f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4649f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4650328d6627c26471332610da3f5a0b9cc3cdd410c7philippe/* Extract info from the conflicting-access machinery.
4651328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   Returns the most recent conflicting access with thr/[a, a+szB[/isW. */
4652c5ea9961f9705b956742ae8c553c76caa2da8c29sewardjBool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
4653ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                              /*OUT*/Thr**        resThr,
4654ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                              /*OUT*/SizeT*       resSzB,
4655ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                              /*OUT*/Bool*        resIsW,
4656ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj                              /*OUT*/WordSetID*   locksHeldW,
4657c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj                              Thr* thr, Addr a, SizeT szB, Bool isW )
4658f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4659a781be6728a28c94aec57f793f4d084456e93684sewardj   Word    i, j;
4660328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   OldRef *ref = NULL;
4661328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   SizeT  ref_szB = 0;
4662f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4663328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   OldRef *cand_ref;
4664328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   SizeT  cand_ref_szB;
4665328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   Addr   cand_a;
4666a781be6728a28c94aec57f793f4d084456e93684sewardj
4667a781be6728a28c94aec57f793f4d084456e93684sewardj   Addr toCheck[15];
4668a781be6728a28c94aec57f793f4d084456e93684sewardj   Int  nToCheck = 0;
4669c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj
4670c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj   tl_assert(thr);
4671c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj   tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
4672f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4673ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   ThrID thrid = thr->thrid;
4674ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
4675a781be6728a28c94aec57f793f4d084456e93684sewardj   toCheck[nToCheck++] = a;
4676a781be6728a28c94aec57f793f4d084456e93684sewardj   for (i = -7; i < (Word)szB; i++) {
4677a781be6728a28c94aec57f793f4d084456e93684sewardj      if (i != 0)
4678a781be6728a28c94aec57f793f4d084456e93684sewardj         toCheck[nToCheck++] = a + i;
4679a781be6728a28c94aec57f793f4d084456e93684sewardj   }
4680a781be6728a28c94aec57f793f4d084456e93684sewardj   tl_assert(nToCheck <= 15);
4681a781be6728a28c94aec57f793f4d084456e93684sewardj
4682a781be6728a28c94aec57f793f4d084456e93684sewardj   /* Now see if we can find a suitable matching event for
4683a781be6728a28c94aec57f793f4d084456e93684sewardj      any of the addresses in toCheck[0 .. nToCheck-1]. */
4684a781be6728a28c94aec57f793f4d084456e93684sewardj   for (j = 0; j < nToCheck; j++) {
4685a781be6728a28c94aec57f793f4d084456e93684sewardj
4686a781be6728a28c94aec57f793f4d084456e93684sewardj      cand_a = toCheck[j];
4687a781be6728a28c94aec57f793f4d084456e93684sewardj      //      VG_(printf)("test %ld %p\n", j, cand_a);
4688a781be6728a28c94aec57f793f4d084456e93684sewardj
4689328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      /* Find the first HT element for this address.
4690328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         We might have several of these. They will be linked via ht_next.
4691328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         We however need to check various elements as the list contains
4692328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         all elements that map to the same bucket. */
4693328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      for (cand_ref = VG_(HT_lookup)( oldrefHT, cand_a );
4694328d6627c26471332610da3f5a0b9cc3cdd410c7philippe           cand_ref; cand_ref = cand_ref->ht_next) {
4695328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         if (cand_ref->ga != cand_a)
4696328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            /* OldRef for another address in this HT bucket. Ignore. */
4697c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj            continue;
4698c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj
4699328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         if (cand_ref->acc.tsw.thrid == thrid)
4700c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj            /* This is an access by the same thread, but we're only
4701c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj               interested in accesses from other threads.  Ignore. */
4702c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj            continue;
4703c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj
4704328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         if ((!cand_ref->acc.tsw.isW) && (!isW))
4705c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj            /* We don't want to report a read racing against another
4706c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj               read; that's stupid.  So in this case move on. */
4707c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj            continue;
4708c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj
4709328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         cand_ref_szB        = cand_ref->acc.tsw.szB;
4710328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         if (cmp_nonempty_intervals(a, szB, cand_a, cand_ref_szB) != 0)
4711a781be6728a28c94aec57f793f4d084456e93684sewardj            /* No overlap with the access we're asking about.  Ignore. */
4712a781be6728a28c94aec57f793f4d084456e93684sewardj            continue;
4713a781be6728a28c94aec57f793f4d084456e93684sewardj
4714328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         /* We have a match. Keep this match if it is newer than
4715328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            the previous match. Note that stamp are Unsigned Words, and
4716328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            for long running applications, event_map_stamp might have cycled.
4717328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            So, 'roll' each stamp using event_map_stamp to have the
4718328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            stamps in the good order, in case event_map_stamp recycled. */
4719328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         if (!ref
4720328d6627c26471332610da3f5a0b9cc3cdd410c7philippe             || (ref->stamp - event_map_stamp)
4721328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                   < (cand_ref->stamp - event_map_stamp)) {
4722328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            ref = cand_ref;
4723328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            ref_szB = cand_ref_szB;
4724328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         }
4725f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
4726c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj
4727328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      if (ref) {
4728a781be6728a28c94aec57f793f4d084456e93684sewardj         /* return with success */
4729328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         Int n, maxNFrames;
4730328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         RCEC*     ref_rcec = ref->acc.rcec;
4731328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         tl_assert(ref->acc.tsw.thrid);
4732328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         tl_assert(ref_rcec);
4733328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         tl_assert(ref_rcec->magic == RCEC_MAGIC);
4734328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         tl_assert(ref_szB >= 1);
47353a4b58f812bb0dd4cdb20b2869629845a683191dnjn         /* Count how many non-zero frames we have. */
47363a4b58f812bb0dd4cdb20b2869629845a683191dnjn         maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
47373a4b58f812bb0dd4cdb20b2869629845a683191dnjn         for (n = 0; n < maxNFrames; n++) {
4738328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            if (0 == ref_rcec->frames[n]) break;
47393a4b58f812bb0dd4cdb20b2869629845a683191dnjn         }
4740328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         *resEC      = VG_(make_ExeContext_from_StackTrace)(ref_rcec->frames,
4741328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                                                            n);
4742328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         *resThr     = Thr__from_ThrID(ref->acc.tsw.thrid);
4743328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         *resSzB     = ref_szB;
4744328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         *resIsW     = ref->acc.tsw.isW;
4745328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         *locksHeldW = ref->acc.locksHeldW;
4746a4b20c089072bdaf83938044ec077a6a89622481philippe         stats__evm__lookup_found++;
4747a781be6728a28c94aec57f793f4d084456e93684sewardj         return True;
4748a781be6728a28c94aec57f793f4d084456e93684sewardj      }
4749f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4750a781be6728a28c94aec57f793f4d084456e93684sewardj      /* consider next address in toCheck[] */
4751a781be6728a28c94aec57f793f4d084456e93684sewardj   } /* for (j = 0; j < nToCheck; j++) */
4752f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4753a781be6728a28c94aec57f793f4d084456e93684sewardj   /* really didn't find anything. */
4754a4b20c089072bdaf83938044ec077a6a89622481philippe   stats__evm__lookup_notfound++;
4755a781be6728a28c94aec57f793f4d084456e93684sewardj   return False;
4756f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4757f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4758328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4759328d6627c26471332610da3f5a0b9cc3cdd410c7philippevoid libhb_event_map_access_history ( Addr a, SizeT szB, Access_t fn )
4760328d6627c26471332610da3f5a0b9cc3cdd410c7philippe{
4761328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   OldRef *ref = lru.next;
4762328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   SizeT ref_szB;
4763328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   Int n;
4764328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4765328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   while (ref != &mru) {
4766328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref_szB = ref->acc.tsw.szB;
4767328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      if (cmp_nonempty_intervals(a, szB, ref->ga, ref_szB) == 0) {
4768328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         RCEC* ref_rcec = ref->acc.rcec;
4769328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         for (n = 0; n < N_FRAMES; n++) {
4770328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            if (0 == ref_rcec->frames[n]) {
4771328d6627c26471332610da3f5a0b9cc3cdd410c7philippe               break;
4772328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            }
4773328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         }
4774328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         (*fn)(ref_rcec->frames, n,
4775328d6627c26471332610da3f5a0b9cc3cdd410c7philippe               Thr__from_ThrID(ref->acc.tsw.thrid),
4776328d6627c26471332610da3f5a0b9cc3cdd410c7philippe               ref->ga,
4777328d6627c26471332610da3f5a0b9cc3cdd410c7philippe               ref_szB,
4778328d6627c26471332610da3f5a0b9cc3cdd410c7philippe               ref->acc.tsw.isW,
4779328d6627c26471332610da3f5a0b9cc3cdd410c7philippe               ref->acc.locksHeldW);
4780328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      }
4781328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      tl_assert (ref->next == &mru
4782328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                 || ((ref->stamp - event_map_stamp)
4783328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                        < ref->next->stamp - event_map_stamp));
4784328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      ref = ref->next;
4785328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   }
4786328d6627c26471332610da3f5a0b9cc3cdd410c7philippe}
4787328d6627c26471332610da3f5a0b9cc3cdd410c7philippe
4788f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void event_map_init ( void )
4789f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4790f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word i;
4791d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj
47926643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe   /* Context (RCEC) pool allocator */
47936643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe   rcec_pool_allocator = VG_(newPA) (
47946643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                             sizeof(RCEC),
47956643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                             1000 /* RCECs per pool */,
47966643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                             HG_(zalloc),
47976643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                             "libhb.event_map_init.1 (RCEC pools)",
47986643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                             HG_(free)
47996643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                          );
4800d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj
4801d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj   /* Context table */
4802f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(!contextTab);
4803d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj   contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
4804f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                             N_RCEC_TAB * sizeof(RCEC*) );
4805f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_RCEC_TAB; i++)
4806f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      contextTab[i] = NULL;
4807f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
48086643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe   /* Oldref pool allocator */
48096643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe   oldref_pool_allocator = VG_(newPA)(
48106643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                               sizeof(OldRef),
48116643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                               1000 /* OldRefs per pool */,
48126643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                               HG_(zalloc),
48136643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                               "libhb.event_map_init.3 (OldRef pools)",
48146643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                               HG_(free)
48156643e96a72e8530a7c8830c02ffb2fb4aee74c88philippe                            );
4816d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj
4817328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   /* Oldref hashtable */
4818328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   tl_assert(!oldrefHT);
4819328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   oldrefHT = VG_(HT_construct) ("libhb.event_map_init.4 (oldref hashtable)");
4820f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4821328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   oldrefHTN = 0;
4822cabdbb5cab3740c7082e44b770a582c8186888e9philippe   mru.prev = &lru;
4823cabdbb5cab3740c7082e44b770a582c8186888e9philippe   mru.next = NULL;
4824cabdbb5cab3740c7082e44b770a582c8186888e9philippe   lru.prev = NULL;
4825cabdbb5cab3740c7082e44b770a582c8186888e9philippe   lru.next = &mru;
4826328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   mru.acc = (Thr_n_RCEC) {.tsw = {.thrid = 0,
4827328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                                   .szB = 0,
4828328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                                   .isW = 0},
4829328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                           .locksHeldW = 0,
4830328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                           .rcec = NULL};
4831328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   lru.acc = mru.acc;
4832f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4833f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4834cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic void event_map__check_reference_counts ( void )
4835f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4836f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   RCEC*   rcec;
4837f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   OldRef* oldref;
4838f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Word    i;
4839f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord   nEnts = 0;
4840f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4841f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Set the 'check' reference counts to zero.  Also, optionally
4842f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      check that the real reference counts are non-zero.  We allow
4843f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      these to fall to zero before a GC, but the GC must get rid of
4844f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      all those that are zero, hence none should be zero after a
4845f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      GC. */
4846f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_RCEC_TAB; i++) {
4847f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4848f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         nEnts++;
4849f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(rcec);
4850f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(rcec->magic == RCEC_MAGIC);
4851f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         rcec->rcX = 0;
4852f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
4853f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4854f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4855f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* check that the stats are sane */
4856f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(nEnts == stats__ctxt_tab_curr);
4857f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4858f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4859f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* visit all the referencing points, inc check ref counts */
4860328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   VG_(HT_ResetIter)( oldrefHT );
4861328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   oldref = VG_(HT_Next)( oldrefHT );
4862328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   while (oldref) {
4863328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      tl_assert (oldref->acc.tsw.thrid);
4864328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      tl_assert (oldref->acc.rcec);
4865328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      tl_assert (oldref->acc.rcec->magic == RCEC_MAGIC);
4866328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      oldref->acc.rcec->rcX++;
4867328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      oldref = VG_(HT_Next)( oldrefHT );
4868f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4869f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4870f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* compare check ref counts with actual */
4871f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_RCEC_TAB; i++) {
4872f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4873f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(rcec->rc == rcec->rcX);
4874f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
4875f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4876f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4877f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
48788fd92d394106e491df4e771f1b7e7966a550dca9sewardj__attribute__((noinline))
4879cabdbb5cab3740c7082e44b770a582c8186888e9philippestatic void do_RCEC_GC ( void )
4880f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4881cabdbb5cab3740c7082e44b770a582c8186888e9philippe   UInt i;
4882d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj
4883cabdbb5cab3740c7082e44b770a582c8186888e9philippe   if (VG_(clo_stats)) {
4884cabdbb5cab3740c7082e44b770a582c8186888e9philippe      static UInt ctr = 1;
4885cabdbb5cab3740c7082e44b770a582c8186888e9philippe      VG_(message)(Vg_DebugMsg,
4886cabdbb5cab3740c7082e44b770a582c8186888e9philippe                  "libhb: RCEC GC: #%u  %lu slots,"
4887cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   " %lu cur ents(ref'd %lu),"
4888cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   " %lu max ents\n",
4889cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   ctr++,
4890cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   (UWord)N_RCEC_TAB,
4891cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   stats__ctxt_tab_curr, RCEC_referenced,
4892cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   stats__ctxt_tab_max );
4893f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4894cabdbb5cab3740c7082e44b770a582c8186888e9philippe   tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
4895f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4896f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Throw away all RCECs with zero reference counts */
4897f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   for (i = 0; i < N_RCEC_TAB; i++) {
4898f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      RCEC** pp = &contextTab[i];
4899f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      RCEC*  p  = *pp;
4900f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      while (p) {
4901f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         if (p->rc == 0) {
4902f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            *pp = p->next;
4903d86e3a28694ca5a7c59881475b8d1f6c9ac285b0sewardj            free_RCEC(p);
4904f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            p = *pp;
4905f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            tl_assert(stats__ctxt_tab_curr > 0);
4906328d6627c26471332610da3f5a0b9cc3cdd410c7philippe            stats__ctxt_rcec_gc_discards++;
4907f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            stats__ctxt_tab_curr--;
4908f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         } else {
4909f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            pp = &p->next;
4910f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            p = p->next;
4911f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         }
4912f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
4913f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
4914f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4915cabdbb5cab3740c7082e44b770a582c8186888e9philippe   tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
4916f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
4917f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
4918f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
4919f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
4920f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// Core MSM                                            //
4921f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
4922f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
4923f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
492423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
492523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Nov 08, and again after [...],
492623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   June 09. */
4927b0e009d5c2607eb66d99d75683187f45db6e30dbsewardj
492823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong stats__msmcread         = 0;
492923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong stats__msmcread_change  = 0;
493023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong stats__msmcwrite        = 0;
493123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic ULong stats__msmcwrite_change = 0;
4932f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
49338ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj/* Some notes on the H1 history mechanism:
49348ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49358ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   Transition rules are:
49368ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49378ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   read_{Kr,Kw}(Cr,Cw)  = (Cr,           Cr `join` Kw)
49388ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
49398ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49408ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   After any access by a thread T to a location L, L's constraint pair
49418ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
49428ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49438ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   After a race by thread T conflicting with some previous access by
49448ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   some other thread U, for a location with constraint (before
49458ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   processing the later access) (Cr,Cw), then Cw[U] is the segment in
49468ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   which the previously access lies.
49478ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49488ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   Hence in record_race_info, we pass in Cfailed and Kfailed, which
49498ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   are compared so as to find out which thread(s) this access
49508ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   conflicts with.  Once that is established, we also require the
49518ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   pre-update Cw for the location, so we can index into it for those
49528ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   threads, to get the scalar clock values for the point at which the
49538ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   former accesses were made.  (In fact we only bother to do any of
49548ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   this for an arbitrarily chosen one of the conflicting threads, as
49558ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   that's simpler, it avoids flooding the user with vast amounts of
49568ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   mostly useless information, and because the program is wrong if it
49578ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   contains any races at all -- so we don't really need to show all
49588ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   conflicting access pairs initially, so long as we only show none if
49598ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   none exist).
49608ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49618ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   ---
49628ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49638ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   That requires the auxiliary proof that
49648ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49658ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      (Cr `join` Kw)[T] == Kw[T]
49668ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
49678ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   Why should that be true?  Because for any thread T, Kw[T] >= the
49688ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   scalar clock value for T known by any other thread.  In other
49698ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   words, because T's value for its own scalar clock is at least as up
49708ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   to date as the value for it known by any other thread (that is true
49718ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   for both the R- and W- scalar clocks).  Hence no other thread will
49728ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   be able to feed in a value for that element (indirectly via a
49738ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   constraint) which will exceed Kw[T], and hence the join cannot
49748ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   cause that particular element to advance.
49758ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj*/
49768ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj
4977f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj__attribute__((noinline))
4978f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic void record_race_info ( Thr* acc_thr,
497923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                               Addr acc_addr, SizeT szB, Bool isWrite,
49808ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                               VtsID Cfailed,
49818ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                               VtsID Kfailed,
49828ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                               VtsID Cw )
4983f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
4984c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj   /* Call here to report a race.  We just hand it onwards to
4985c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj      HG_(record_error_Race).  If that in turn discovers that the
498623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      error is going to be collected, then, at history_level 2, that
498723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      queries the conflicting-event map.  The alternative would be to
498823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      query it right here.  But that causes a lot of pointless queries
498923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      for errors which will shortly be discarded as duplicates, and
499023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      can become a performance overhead; so we defer the query until
499123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      we know the error is not a duplicate. */
499223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
499323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* Stacks for the bounds of the (or one of the) conflicting
499423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      segment(s).  These are only set at history_level 1. */
499523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   ExeContext* hist1_seg_start = NULL;
499623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   ExeContext* hist1_seg_end   = NULL;
499723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Thread*     hist1_conf_thr  = NULL;
499823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
499923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(acc_thr);
500060626649e5fa6cd21af377fde5e83803fc136f61sewardj   tl_assert(acc_thr->hgthread);
500160626649e5fa6cd21af377fde5e83803fc136f61sewardj   tl_assert(acc_thr->hgthread->hbthr == acc_thr);
500223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
500323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
500423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (HG_(clo_history_level) == 1) {
500523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Bool found;
500623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Word firstIx, lastIx;
500723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      ULong_n_EC key;
500823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
500923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* At history_level 1, we must round up the relevant stack-pair
501023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         for the conflicting segment right now.  This is because
50118ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         deferring it is complex; we can't (easily) put Kfailed and
50128ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         Cfailed into the XError and wait for later without
501323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         getting tied up in difficulties with VtsID reference
501423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         counting.  So just do it now. */
501523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Thr*  confThr;
501623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      ULong confTym = 0;
501723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* Which thread are we in conflict with?  There may be more than
501823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
501923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         (in fact it's the one with the lowest Thr* value). */
50208ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
502123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* This must exist!  since if it was NULL then there's no
50228ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         conflict (semantics of return value of
50238ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         VtsID__findFirst_notLEQ), and msmc{read,write}, which has
50248ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         called us, just checked exactly this -- that there was in
50258ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         fact a race. */
502623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      tl_assert(confThr);
502723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
502823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* Get the scalar clock value that the conflicting thread
502923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         introduced into the constraint.  A careful examination of the
503023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         base machine rules shows that this must be the same as the
503123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         conflicting thread's scalar clock when it created this
503223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         constraint.  Hence we know the scalar clock of the
503323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         conflicting thread when the conflicting access was made. */
50348ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      confTym = VtsID__indexAt( Cfailed, confThr );
503523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
503623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* Using this scalar clock, index into the conflicting thread's
503723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         collection of stack traces made each time its vector clock
503823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         (hence its scalar clock) changed.  This gives the stack
503923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         traces at the start and end of the conflicting segment (well,
504023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         as per comment just above, of one of the conflicting
504123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         segments, if there are more than one). */
504223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      key.ull = confTym;
504323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      key.ec  = NULL;
504423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* tl_assert(confThr); -- asserted just above */
50458ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      tl_assert(confThr->local_Kws_n_stacks);
504623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      firstIx = lastIx = 0;
504723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      found = VG_(lookupXA_UNSAFE)(
50488ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                 confThr->local_Kws_n_stacks,
504923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                 &key, &firstIx, &lastIx,
50506bd9dc18c043927c1196caba20a327238a179c42florian                 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
505123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj              );
50528ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      if (0) VG_(printf)("record_race_info %u %u %u  confThr %p "
50535e5cb009574352880f1bc530e1a73ddaae5003fcflorian                         "confTym %llu found %d (%ld,%ld)\n",
50548ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                         Cfailed, Kfailed, Cw,
505523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                         confThr, confTym, found, firstIx, lastIx);
505623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* We can't indefinitely collect stack traces at VTS
505723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         transitions, since we'd eventually run out of memory.  Hence
50588ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         note_local_Kw_n_stack_for will eventually throw away old
505923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         ones, which in turn means we might fail to find index value
506023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         confTym in the array. */
506123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (found) {
506223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         ULong_n_EC *pair_start, *pair_end;
506323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         pair_start
50648ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj            = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
506523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         hist1_seg_start = pair_start->ec;
50668ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj         if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
506723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            pair_end
50688ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj               = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
506923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                                            lastIx+1 );
507023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            /* from properties of VG_(lookupXA) and the comparison fn used: */
507123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            tl_assert(pair_start->ull < pair_end->ull);
507223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            hist1_seg_end = pair_end->ec;
50738ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj            /* Could do a bit better here.  It may be that pair_end
50748ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj               doesn't have a stack, but the following entries in the
50758ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj               array have the same scalar Kw and to have a stack.  So
50768ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj               we should search a bit further along the array than
50778ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj               lastIx+1 if hist1_seg_end is NULL. */
507823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         } else {
5079ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj            if (!confThr->llexit_done)
508023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj               hist1_seg_end = main_get_EC( confThr );
508123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         }
508223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         // seg_start could be NULL iff this is the first stack in the thread
508323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         //if (seg_start) VG_(pp_ExeContext)(seg_start);
508423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         //if (seg_end)   VG_(pp_ExeContext)(seg_end);
508560626649e5fa6cd21af377fde5e83803fc136f61sewardj         hist1_conf_thr = confThr->hgthread;
508623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      }
508723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
508823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
508960626649e5fa6cd21af377fde5e83803fc136f61sewardj   HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
509023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                           szB, isWrite,
509123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                           hist1_conf_thr, hist1_seg_start, hist1_seg_end );
5092f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5093f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5094f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic Bool is_sane_SVal_C ( SVal sv ) {
509523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Bool leq;
5096f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!SVal__isC(sv)) return True;
509723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
509823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   return leq;
5099f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5100f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5101f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5102f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Compute new state following a read */
510323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline SVal msmcread ( SVal svOld,
5104f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                              /* The following are only needed for
5105f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                 creating error reports. */
5106f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                              Thr* acc_thr,
5107f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                              Addr acc_addr, SizeT szB )
5108f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
5109f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal svNew = SVal_INVALID;
511023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__msmcread++;
5111f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5112f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Redundant sanity check on the constraints */
51138f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   if (CHECK_MSM) {
5114f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(is_sane_SVal_C(svOld));
5115f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5116f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
51171c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (LIKELY(SVal__isC(svOld))) {
5118f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID tviR  = acc_thr->viR;
5119f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID tviW  = acc_thr->viW;
5120f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID rmini = SVal__unC_Rmin(svOld);
5121f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID wmini = SVal__unC_Wmin(svOld);
512223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Bool  leq   = VtsID__cmpLEQ(rmini,tviR);
512323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (LIKELY(leq)) {
5124f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* no race */
5125f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* Note: RWLOCK subtlety: use tviW, not tviR */
5126f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5127f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto out;
5128f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
5129b0e009d5c2607eb66d99d75683187f45db6e30dbsewardj         /* assert on sanity of constraints. */
513023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
513123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         tl_assert(leqxx);
513223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         // same as in non-race case
513323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
513423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
51358ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                           rmini, /* Cfailed */
51368ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                           tviR,  /* Kfailed */
51378ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                           wmini  /* Cw */ );
5138f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto out;
5139f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
5140f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5141f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (SVal__isA(svOld)) {
5142f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* reading no-access memory (sigh); leave unchanged */
5143f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* check for no pollution */
5144f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(svOld == SVal_NOACCESS);
5145f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      svNew = SVal_NOACCESS;
5146f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      goto out;
5147f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
514823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
5149f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(0);
5150f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5151f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  out:
51528f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   if (CHECK_MSM) {
5153f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(is_sane_SVal_C(svNew));
5154f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
51551c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (UNLIKELY(svNew != svOld)) {
51561c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
515723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (HG_(clo_history_level) >= 2
51581c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj          && SVal__isC(svOld) && SVal__isC(svNew)) {
5159c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj         event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
516023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         stats__msmcread_change++;
5161f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
5162f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5163f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return svNew;
5164f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5165f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5166f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5167f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Compute new state following a write */
516823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline SVal msmcwrite ( SVal svOld,
5169f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                              /* The following are only needed for
5170f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                                 creating error reports. */
5171f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                              Thr* acc_thr,
5172f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                              Addr acc_addr, SizeT szB )
5173f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
5174f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal svNew = SVal_INVALID;
517523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__msmcwrite++;
5176f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5177f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Redundant sanity check on the constraints */
51788f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   if (CHECK_MSM) {
5179f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(is_sane_SVal_C(svOld));
5180f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5181f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
51821c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (LIKELY(SVal__isC(svOld))) {
5183f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID tviW  = acc_thr->viW;
5184f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID wmini = SVal__unC_Wmin(svOld);
518523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Bool  leq   = VtsID__cmpLEQ(wmini,tviW);
518623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (LIKELY(leq)) {
5187f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* no race */
5188f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         svNew = SVal__mkC( tviW, tviW );
5189f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto out;
5190f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
5191f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         VtsID rmini = SVal__unC_Rmin(svOld);
5192b0e009d5c2607eb66d99d75683187f45db6e30dbsewardj         /* assert on sanity of constraints. */
519323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
519423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         tl_assert(leqxx);
519523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         // same as in non-race case
519623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         // proof: in the non-race case, we have
519723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         //    rmini <= wmini (invar on constraints)
519823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         //    tviW <= tviR (invar on thread clocks)
519923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         //    wmini <= tviW (from run-time check)
520023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         // hence from transitivity of <= we have
520123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         //    rmini <= wmini <= tviW
520223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         // and so join(rmini,tviW) == tviW
520323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         // and    join(wmini,tviW) == tviW
520423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         // qed.
520523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         svNew = SVal__mkC( VtsID__join2(rmini, tviW),
520623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                            VtsID__join2(wmini, tviW) );
520723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
52088ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                           wmini, /* Cfailed */
52098ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                           tviW,  /* Kfailed */
52108ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj                           wmini  /* Cw */ );
5211f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto out;
5212f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
5213f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5214f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (SVal__isA(svOld)) {
5215f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* writing no-access memory (sigh); leave unchanged */
5216f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* check for no pollution */
5217f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(svOld == SVal_NOACCESS);
5218f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      svNew = SVal_NOACCESS;
5219f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      goto out;
5220f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
522123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
5222f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(0);
5223f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5224f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  out:
52258f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj   if (CHECK_MSM) {
5226f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(is_sane_SVal_C(svNew));
5227f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
52281c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (UNLIKELY(svNew != svOld)) {
52291c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
523023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (HG_(clo_history_level) >= 2
52311c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj          && SVal__isC(svOld) && SVal__isC(svNew)) {
5232c5ea9961f9705b956742ae8c553c76caa2da8c29sewardj         event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
523323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         stats__msmcwrite_change++;
5234f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
5235f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5236f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return svNew;
5237f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5238f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5239f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5240f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
5241f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
5242f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// Apply core MSM to specific memory locations         //
5243f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
5244f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
5245f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
524623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*------------- ZSM accesses: 8 bit sapply ------------- */
5247f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
524823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
5249f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5250f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5251f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       svOld, svNew;
5252f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
525323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_cread08s++;
5254f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5255f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5256f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5257f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0 .. 7 */
5258f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5259f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5260f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal* tree = &cl->svals[tno << 3];
5261f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
52628f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
5263f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5264f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5265f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   svOld = cl->svals[cloff];
526623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   svNew = msmcread( svOld, thr,a,1 );
52671c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (CHECK_ZSM)
52681c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
5269f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5270f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5271f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
527223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
5273f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5274f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5275f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       svOld, svNew;
5276f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
527723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_cwrite08s++;
5278f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5279f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5280f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5281f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0 .. 7 */
5282f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5283f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5284f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal* tree = &cl->svals[tno << 3];
5285f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
52868f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
5287f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5288f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5289f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   svOld = cl->svals[cloff];
529023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   svNew = msmcwrite( svOld, thr,a,1 );
52911c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (CHECK_ZSM)
52921c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
5293f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5294f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5295f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
529623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*------------- ZSM accesses: 16 bit sapply ------------- */
5297f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
529823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
5299f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5300f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5301f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       svOld, svNew;
5302f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
530323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_cread16s++;
5304f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned16(a))) goto slowcase;
5305f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5306f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5307f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5308f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5309f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5310f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5311f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (valid_value_is_below_me_16(descr, toff)) {
5312f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto slowcase;
5313f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
5314f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         SVal* tree = &cl->svals[tno << 3];
5315f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5316f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
53178f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
5318f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5319f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5320f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   svOld = cl->svals[cloff];
532123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   svNew = msmcread( svOld, thr,a,2 );
53221c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (CHECK_ZSM)
53231c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
5324f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5325f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5326f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned, or must go further down the tree */
5327f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_16to8splits++;
532823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply08__msmcread( thr, a + 0 );
532923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply08__msmcread( thr, a + 1 );
5330f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5331f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
533223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
5333f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5334f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5335f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       svOld, svNew;
5336f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
533723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_cwrite16s++;
5338f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned16(a))) goto slowcase;
5339f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5340f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5341f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5342f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5343f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5344f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5345f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (valid_value_is_below_me_16(descr, toff)) {
5346f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto slowcase;
5347f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
5348f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         SVal* tree = &cl->svals[tno << 3];
5349f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5350f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
53518f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
5352f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5353f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5354f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   svOld = cl->svals[cloff];
535523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   svNew = msmcwrite( svOld, thr,a,2 );
53561c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (CHECK_ZSM)
53571c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
5358f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5359f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5360f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned, or must go further down the tree */
5361f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_16to8splits++;
536223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply08__msmcwrite( thr, a + 0 );
536323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply08__msmcwrite( thr, a + 1 );
5364f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5365f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
536623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*------------- ZSM accesses: 32 bit sapply ------------- */
5367f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
536823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
5369f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5370f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5371f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       svOld, svNew;
5372f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
537323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_cread32s++;
5374f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned32(a))) goto slowcase;
5375f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5376f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5377f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5378f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0 or 4 */
5379f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5380f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5381f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (valid_value_is_above_me_32(descr, toff)) {
5382f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         SVal* tree = &cl->svals[tno << 3];
5383f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5384f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
5385f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto slowcase;
5386f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
53878f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
5388f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5389f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5390f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   svOld = cl->svals[cloff];
539123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   svNew = msmcread( svOld, thr,a,4 );
53921c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (CHECK_ZSM)
53931c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
5394f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5395f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5396f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned, or must go further down the tree */
5397f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_32to16splits++;
539823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply16__msmcread( thr, a + 0 );
539923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply16__msmcread( thr, a + 2 );
5400f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5401f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
540223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
5403f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5404f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5405f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       svOld, svNew;
5406f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
540723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_cwrite32s++;
5408f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned32(a))) goto slowcase;
5409f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5410f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5411f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5412f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0 or 4 */
5413f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5414f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5415f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (valid_value_is_above_me_32(descr, toff)) {
5416f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         SVal* tree = &cl->svals[tno << 3];
5417f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5418f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
5419f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         goto slowcase;
5420f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
54218f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
5422f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5423f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5424f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   svOld = cl->svals[cloff];
542523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   svNew = msmcwrite( svOld, thr,a,4 );
54261c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (CHECK_ZSM)
54271c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
5428f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5429f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5430f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned, or must go further down the tree */
5431f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_32to16splits++;
543223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply16__msmcwrite( thr, a + 0 );
543323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply16__msmcwrite( thr, a + 2 );
5434f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5435f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
543623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*------------- ZSM accesses: 64 bit sapply ------------- */
5437f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
543823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
5439f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
54404c245e595b9f6300d3120408ca873f7115d9cc7dnjn   UWord      cloff, tno;
54414c245e595b9f6300d3120408ca873f7115d9cc7dnjn   //UWord      toff;
5442f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       svOld, svNew;
5443f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
544423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_cread64s++;
5445f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned64(a))) goto slowcase;
5446f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5447f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5448f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
54494c245e595b9f6300d3120408ca873f7115d9cc7dnjn   //toff  = get_tree_offset(a); /* == 0, unused */
5450f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5451f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5452f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      goto slowcase;
5453f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5454f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   svOld = cl->svals[cloff];
545523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   svNew = msmcread( svOld, thr,a,8 );
54561c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (CHECK_ZSM)
54571c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
5458f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5459f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5460f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned, or must go further down the tree */
5461f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_64to32splits++;
546223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply32__msmcread( thr, a + 0 );
546323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply32__msmcread( thr, a + 4 );
5464f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5465f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
546623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
5467f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
54684c245e595b9f6300d3120408ca873f7115d9cc7dnjn   UWord      cloff, tno;
54694c245e595b9f6300d3120408ca873f7115d9cc7dnjn   //UWord      toff;
5470f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       svOld, svNew;
5471f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
547223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_cwrite64s++;
5473f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned64(a))) goto slowcase;
5474f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5475f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5476f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
54774c245e595b9f6300d3120408ca873f7115d9cc7dnjn   //toff  = get_tree_offset(a); /* == 0, unused */
5478f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5479f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5480f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      goto slowcase;
5481f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5482f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   svOld = cl->svals[cloff];
548323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   svNew = msmcwrite( svOld, thr,a,8 );
54841c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj   if (CHECK_ZSM)
54851c0ce7a9edf8772773f6216dbad4bb04317d23b6sewardj      tl_assert(svNew != SVal_INVALID);
5486f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5487f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5488f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned, or must go further down the tree */
5489f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_64to32splits++;
549023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply32__msmcwrite( thr, a + 0 );
549123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply32__msmcwrite( thr, a + 4 );
5492f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5493f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
549423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*--------------- ZSM accesses: 8 bit swrite --------------- */
5495f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5496f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic
549723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_swrite08 ( Addr a, SVal svNew ) {
5498f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5499f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5500f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
550123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_swrite08s++;
5502f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5503f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5504f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5505f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0 .. 7 */
5506f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5507f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5508f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal* tree = &cl->svals[tno << 3];
5509f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
55108f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
5511f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5512f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5513f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(svNew != SVal_INVALID);
5514f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff] = svNew;
5515f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5516f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
551723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*--------------- ZSM accesses: 16 bit swrite --------------- */
5518f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5519f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic
552023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_swrite16 ( Addr a, SVal svNew ) {
5521f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5522f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5523f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
552423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_swrite16s++;
5525f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned16(a))) goto slowcase;
5526f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5527f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5528f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5529f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5530f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5531f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5532f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (valid_value_is_below_me_16(descr, toff)) {
5533f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* Writing at this level.  Need to fix up 'descr'. */
5534f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5535f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* At this point, the tree does not match cl->descr[tno] any
5536f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            more.  The assignments below will fix it up. */
5537f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
5538f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* We can't indiscriminately write on the w16 node as in the
5539f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            w64 case, as that might make the node inconsistent with
5540f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            its parent.  So first, pull down to this level. */
5541f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         SVal* tree = &cl->svals[tno << 3];
5542f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
55438f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj      if (CHECK_ZSM)
5544f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5545f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
5546f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5547f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(svNew != SVal_INVALID);
5548f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 0] = svNew;
5549f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 1] = SVal_INVALID;
5550f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5551f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned */
5552f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_16to8splits++;
555323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_swrite08( a + 0, svNew );
555423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_swrite08( a + 1, svNew );
5555f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5556f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
555723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*--------------- ZSM accesses: 32 bit swrite --------------- */
5558f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5559f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic
556023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_swrite32 ( Addr a, SVal svNew ) {
5561f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5562f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5563f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
556423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_swrite32s++;
5565f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned32(a))) goto slowcase;
5566f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5567f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5568f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5569f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0 or 4 */
5570f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5571f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5572f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (valid_value_is_above_me_32(descr, toff)) {
5573f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* We can't indiscriminately write on the w32 node as in the
5574f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            w64 case, as that might make the node inconsistent with
5575f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            its parent.  So first, pull down to this level. */
5576f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         SVal* tree = &cl->svals[tno << 3];
5577f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
55788f5374ee1084af8cb46ec405a1de38c5aeb9194bsewardj         if (CHECK_ZSM)
5579f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5580f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      } else {
5581f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* Writing at this level.  Need to fix up 'descr'. */
5582f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5583f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         /* At this point, the tree does not match cl->descr[tno] any
5584f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj            more.  The assignments below will fix it up. */
5585f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
5586f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5587f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(svNew != SVal_INVALID);
5588f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 0] = svNew;
5589f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 1] = SVal_INVALID;
5590f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 2] = SVal_INVALID;
5591f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 3] = SVal_INVALID;
5592f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5593f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned */
5594f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_32to16splits++;
559523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_swrite16( a + 0, svNew );
559623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_swrite16( a + 2, svNew );
5597f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5598f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
559923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*--------------- ZSM accesses: 64 bit swrite --------------- */
5600f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5601f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic
560223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_swrite64 ( Addr a, SVal svNew ) {
5603f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
56044c245e595b9f6300d3120408ca873f7115d9cc7dnjn   UWord      cloff, tno;
56054c245e595b9f6300d3120408ca873f7115d9cc7dnjn   //UWord    toff;
560623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_swrite64s++;
5607f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY(!aligned64(a))) goto slowcase;
5608f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5609f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5610f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
56114c245e595b9f6300d3120408ca873f7115d9cc7dnjn   //toff  = get_tree_offset(a); /* == 0, unused */
5612f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->descrs[tno] = TREE_DESCR_64;
5613f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(svNew != SVal_INVALID);
5614f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 0] = svNew;
5615f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 1] = SVal_INVALID;
5616f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 2] = SVal_INVALID;
5617f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 3] = SVal_INVALID;
5618f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 4] = SVal_INVALID;
5619f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 5] = SVal_INVALID;
5620f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 6] = SVal_INVALID;
5621f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl->svals[cloff + 7] = SVal_INVALID;
5622f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return;
5623f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  slowcase: /* misaligned */
5624f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   stats__cline_64to32splits++;
562523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_swrite32( a + 0, svNew );
562623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_swrite32( a + 4, svNew );
5627f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5628f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
562923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
5630f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5631f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjstatic
563223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjSVal zsm_sread08 ( Addr a ) {
5633f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   CacheLine* cl;
5634f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UWord      cloff, tno, toff;
5635f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   UShort     descr;
563623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_sread08s++;
5637f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cl    = get_cacheline(a);
5638f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   cloff = get_cacheline_offset(a);
5639f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tno   = get_treeno(a);
5640f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   toff  = get_tree_offset(a); /* == 0 .. 7 */
5641f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   descr = cl->descrs[tno];
5642f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5643f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      SVal* tree = &cl->svals[tno << 3];
5644f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5645f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5646f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return cl->svals[cloff];
5647f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5648f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
564923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
5650f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal       sv;
565123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cline_scopy08s++;
565223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   sv = zsm_sread08( src );
565323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_swrite08( dst, sv );
5654f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5655f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5656f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
565723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* Block-copy states (needed for implementing realloc()).  Note this
565823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   doesn't change the filtering arrangements.  The caller of
565923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_scopy_range needs to attend to that. */
566023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
566123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
566223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
566323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   SizeT i;
566423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (len == 0)
566523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return;
566623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
566723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* assert for non-overlappingness */
566823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(src+len <= dst || dst+len <= src);
566923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
567023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* To be simple, just copy byte by byte.  But so as not to wreck
567123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      performance for later accesses to dst[0 .. len-1], normalise
567223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      destination lines as we finish with them, and also normalise the
567323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      line containing the first and last address. */
567423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   for (i = 0; i < len; i++) {
567523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Bool normalise
567623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
567723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           || i == 0       /* first in range */
567823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           || i == len-1;  /* last in range */
567923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_scopy08( src+i, dst+i, normalise );
568023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
568123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
568223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
568323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
568423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* For setting address ranges to a given value.  Has considerable
568523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   sophistication so as to avoid generating large numbers of pointless
568623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   cache loads/writebacks for large ranges. */
568723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
568823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* Do small ranges in-cache, in the obvious way. */
568923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic
569023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
5691f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
5692f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* fast track a couple of common cases */
5693f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 4 && aligned32(a)) {
569423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_swrite32( a, svNew );
5695f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
5696f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5697f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 8 && aligned64(a)) {
569823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_swrite64( a, svNew );
5699f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
5700f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5701f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5702f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* be completely general (but as efficient as possible) */
5703f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5704f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5705f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned16(a) && len >= 1) {
570623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_swrite08( a, svNew );
5707f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 1;
5708f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 1;
5709f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned16(a));
5710f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5711f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5712f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5713f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned32(a) && len >= 2) {
571423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_swrite16( a, svNew );
5715f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 2;
5716f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 2;
5717f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned32(a));
5718f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5719f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5720f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5721f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned64(a) && len >= 4) {
572223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_swrite32( a, svNew );
5723f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 4;
5724f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 4;
5725f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
5726f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5727f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5728f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5729f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 8) {
5730f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
5731f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      while (len >= 8) {
573223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         zsm_swrite64( a, svNew );
5733f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         a += 8;
5734f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         len -= 8;
5735f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
5736f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
5737f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5738f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5739f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5740f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 4)
5741f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned32(a));
5742f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 4) {
574323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_swrite32( a, svNew );
5744f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 4;
5745f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 4;
5746f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5747f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5748f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5749f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 2)
5750f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned16(a));
5751f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 2) {
575223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_swrite16( a, svNew );
5753f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 2;
5754f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 2;
5755f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5756f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5757f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5758f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 1) {
575923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_swrite08( a, svNew );
57604c245e595b9f6300d3120408ca873f7115d9cc7dnjn      //a += 1;
5761f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 1;
5762f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5763f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(len == 0);
5764f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
5765f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5766f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
576723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/* If we're doing a small range, hand off to zsm_sset_range_SMALL.  But
576823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   for larger ranges, try to operate directly on the out-of-cache
576923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   representation, rather than dragging lines into the cache,
577023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   overwriting them, and forcing them out.  This turns out to be an
577123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   important performance optimisation.
577223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
577323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Note that this doesn't change the filtering arrangements.  The
577423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   caller of zsm_sset_range needs to attend to that. */
577523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
577623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
577723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj{
577823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(svNew != SVal_INVALID);
577923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__cache_make_New_arange += (ULong)len;
578023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
578123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0 && len > 500)
57825e5cb009574352880f1bc530e1a73ddaae5003fcflorian      VG_(printf)("make New      ( %#lx, %lu )\n", a, len );
578323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
578423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0) {
578523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      static UWord n_New_in_cache = 0;
578623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      static UWord n_New_not_in_cache = 0;
578723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      /* tag is 'a' with the in-line offset masked out,
578823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         eg a[31]..a[4] 0000 */
578923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Addr       tag = a & ~(N_LINE_ARANGE - 1);
579023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
579123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (LIKELY(tag == cache_shmem.tags0[wix])) {
579223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         n_New_in_cache++;
579323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      } else {
579423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         n_New_not_in_cache++;
579523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      }
579623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
579723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
579823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                     n_New_in_cache, n_New_not_in_cache );
579923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
580023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
580123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(len < 2 * N_LINE_ARANGE)) {
580223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sset_range_SMALL( a, len, svNew );
580323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   } else {
580423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Addr  before_start  = a;
580523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Addr  aligned_start = cacheline_ROUNDUP(a);
580623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      Addr  after_start   = cacheline_ROUNDDN(a + len);
580723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      UWord before_len    = aligned_start - before_start;
580823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      UWord aligned_len   = after_start - aligned_start;
580923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      UWord after_len     = a + len - after_start;
581023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      tl_assert(before_start <= aligned_start);
581123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      tl_assert(aligned_start <= after_start);
581223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      tl_assert(before_len < N_LINE_ARANGE);
581323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      tl_assert(after_len < N_LINE_ARANGE);
581423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      tl_assert(get_cacheline_offset(aligned_start) == 0);
581523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (get_cacheline_offset(a) == 0) {
581623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         tl_assert(before_len == 0);
581723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         tl_assert(a == aligned_start);
581823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      }
581923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (get_cacheline_offset(a+len) == 0) {
582023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         tl_assert(after_len == 0);
582123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         tl_assert(after_start == a+len);
582223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      }
582323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (before_len > 0) {
582423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         zsm_sset_range_SMALL( before_start, before_len, svNew );
582523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      }
582623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      if (after_len > 0) {
582723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         zsm_sset_range_SMALL( after_start, after_len, svNew );
582823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      }
582923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__cache_make_New_inZrep += (ULong)aligned_len;
583023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
583123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      while (1) {
583223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         Addr tag;
583323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         UWord wix;
583423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         if (aligned_start >= after_start)
583523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            break;
583623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         tl_assert(get_cacheline_offset(aligned_start) == 0);
583723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         tag = aligned_start & ~(N_LINE_ARANGE - 1);
583823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
583923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         if (tag == cache_shmem.tags0[wix]) {
584023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            UWord i;
584123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            for (i = 0; i < N_LINE_ARANGE / 8; i++)
584223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj               zsm_swrite64( aligned_start + i * 8, svNew );
584323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         } else {
584423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            UWord i;
584523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            Word zix;
584623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            SecMap* sm;
584723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            LineZ* lineZ;
584823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            /* This line is not in the cache.  Do not force it in; instead
584923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj               modify it in-place. */
585023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            /* find the Z line to write in and rcdec it or the
585123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj               associated F line. */
585223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            find_Z_for_writing( &sm, &zix, tag );
585323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            tl_assert(sm);
585423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
585523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            lineZ = &sm->linesZ[zix];
585623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            lineZ->dict[0] = svNew;
585723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
585823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            for (i = 0; i < N_LINE_ARANGE/4; i++)
585923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj               lineZ->ix2s[i] = 0; /* all refer to dict[0] */
586023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj            rcinc_LineZ(lineZ);
586123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         }
586223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         aligned_start += N_LINE_ARANGE;
586323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         aligned_len -= N_LINE_ARANGE;
586423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      }
586523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      tl_assert(aligned_start == after_start);
586623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      tl_assert(aligned_len == 0);
586723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
586823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
586923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
587023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
587123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/////////////////////////////////////////////////////////
587223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj//                                                     //
587323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj// Front-filtering accesses                            //
587423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj//                                                     //
587523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj/////////////////////////////////////////////////////////
587623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
587723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__f_ac = 0;
587823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic UWord stats__f_sk = 0;
587923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
588023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj#if 0
588123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj#  define STATS__F_SHOW \
588223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     do { \
588323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj        if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
588423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           VG_(printf)("filters: ac %lu sk %lu\n",   \
588523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj           stats__f_ac, stats__f_sk); \
588623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     } while (0)
588723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj#else
588823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj#  define STATS__F_SHOW /* */
588923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj#endif
589023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
589123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
589223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__f_ac++;
589323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   STATS__F_SHOW;
589423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
589523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__f_sk++;
589623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return;
589723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
589823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply08__msmcwrite(thr, a);
589923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
590023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
590123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
590223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__f_ac++;
590323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   STATS__F_SHOW;
590423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
590523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__f_sk++;
590623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return;
590723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
590823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply16__msmcwrite(thr, a);
590923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
591023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
591123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
591223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__f_ac++;
591323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   STATS__F_SHOW;
591423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
591523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__f_sk++;
591623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return;
591723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
591823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply32__msmcwrite(thr, a);
591923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
5920f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
592123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
592223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__f_ac++;
592323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   STATS__F_SHOW;
592423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
592523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__f_sk++;
592623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return;
592723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
592823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply64__msmcwrite(thr, a);
592923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
593023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
593123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5932f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
5933f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* fast track a couple of common cases */
5934f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 4 && aligned32(a)) {
593523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply32_f__msmcwrite( thr, a );
5936f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
5937f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5938f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 8 && aligned64(a)) {
593923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply64_f__msmcwrite( thr, a );
5940f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
5941f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5942f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5943f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* be completely general (but as efficient as possible) */
5944f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5945f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5946f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned16(a) && len >= 1) {
594723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply08_f__msmcwrite( thr, a );
5948f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 1;
5949f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 1;
5950f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned16(a));
5951f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5952f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5953f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5954f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned32(a) && len >= 2) {
595523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply16_f__msmcwrite( thr, a );
5956f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 2;
5957f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 2;
5958f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned32(a));
5959f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5960f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5961f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5962f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned64(a) && len >= 4) {
596323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply32_f__msmcwrite( thr, a );
5964f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 4;
5965f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 4;
5966f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
5967f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5968f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5969f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5970f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 8) {
5971f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
5972f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      while (len >= 8) {
597323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         zsm_sapply64_f__msmcwrite( thr, a );
5974f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         a += 8;
5975f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         len -= 8;
5976f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
5977f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
5978f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5979f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5980f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5981f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 4)
5982f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned32(a));
5983f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 4) {
598423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply32_f__msmcwrite( thr, a );
5985f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 4;
5986f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 4;
5987f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5988f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5989f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5990f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 2)
5991f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned16(a));
5992f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 2) {
599323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply16_f__msmcwrite( thr, a );
5994f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 2;
5995f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 2;
5996f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
5997f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
5998f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
5999f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 1) {
600023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply08_f__msmcwrite( thr, a );
60014c245e595b9f6300d3120408ca873f7115d9cc7dnjn      //a += 1;
6002f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 1;
6003f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6004f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(len == 0);
6005f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6006f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
600723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
600823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__f_ac++;
600923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   STATS__F_SHOW;
601023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
601123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__f_sk++;
6012f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
601323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
601423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply08__msmcread(thr, a);
601523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
6016f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
601723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
601823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__f_ac++;
601923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   STATS__F_SHOW;
602023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
602123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__f_sk++;
602223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return;
6023f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
602423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply16__msmcread(thr, a);
6025f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6026f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
602723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
602823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__f_ac++;
602923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   STATS__F_SHOW;
603023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
603123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__f_sk++;
603223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return;
603323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
603423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply32__msmcread(thr, a);
603523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
6036f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
603723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
603823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   stats__f_ac++;
603923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   STATS__F_SHOW;
604023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
604123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      stats__f_sk++;
604223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      return;
604323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   }
604423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sapply64__msmcread(thr, a);
604523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj}
6046f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
604723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6048f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6049f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* fast track a couple of common cases */
6050f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 4 && aligned32(a)) {
605123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply32_f__msmcread( thr, a );
6052f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
6053f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6054f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 8 && aligned64(a)) {
605523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply64_f__msmcread( thr, a );
6056f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return;
6057f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6058f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6059f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* be completely general (but as efficient as possible) */
6060f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
6061f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6062f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned16(a) && len >= 1) {
606323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply08_f__msmcread( thr, a );
6064f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 1;
6065f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 1;
6066f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned16(a));
6067f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6068f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
6069f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6070f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned32(a) && len >= 2) {
607123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply16_f__msmcread( thr, a );
6072f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 2;
6073f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 2;
6074f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned32(a));
6075f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6076f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
6077f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6078f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (!aligned64(a) && len >= 4) {
607923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply32_f__msmcread( thr, a );
6080f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 4;
6081f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 4;
6082f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
6083f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6084f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
6085f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6086f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 8) {
6087f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
6088f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      while (len >= 8) {
608923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj         zsm_sapply64_f__msmcread( thr, a );
6090f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         a += 8;
6091f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         len -= 8;
6092f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
6093f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned64(a));
6094f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6095f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
6096f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6097f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 4)
6098f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned32(a));
6099f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 4) {
610023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply32_f__msmcread( thr, a );
6101f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 4;
6102f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 4;
6103f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6104f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
6105f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6106f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 2)
6107f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(aligned16(a));
6108f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 2) {
610923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply16_f__msmcread( thr, a );
6110f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      a += 2;
6111f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 2;
6112f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6113f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len == 0) return;
6114f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6115f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (len >= 1) {
611623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      zsm_sapply08_f__msmcread( thr, a );
61174c245e595b9f6300d3120408ca873f7115d9cc7dnjn      //a += 1;
6118f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      len -= 1;
6119f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6120f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(len == 0);
6121f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6122f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
612323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid libhb_Thr_resumes ( Thr* thr )
6124f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
612523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0) VG_(printf)("resume %p\n", thr);
61262d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   tl_assert(thr);
6127ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(!thr->llexit_done);
612823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Filter__clear(thr->filter, "libhb_Thr_resumes");
612923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* A kludge, but .. if this thread doesn't have any marker stacks
613023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      at all, get one right now.  This is easier than figuring out
613123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      exactly when at thread startup we can and can't take a stack
613223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      snapshot. */
61332d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   if (HG_(clo_history_level) == 1) {
61342d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      tl_assert(thr->local_Kws_n_stacks);
61352d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
61362d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj         note_local_Kw_n_stack_for(thr);
61372d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   }
6138f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6139f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6140f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6141f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
6142f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
6143f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// Synchronisation objects                             //
6144f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
6145f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
6146f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6147ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* A double linked list of all the SO's. */
6148ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjSO* admin_SO = NULL;
6149f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6150ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic SO* SO__Alloc ( void )
6151ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj{
6152f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6153f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   so->viR   = VtsID_INVALID;
6154f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   so->viW   = VtsID_INVALID;
6155f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   so->magic = SO_MAGIC;
6156ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Add to double linked list */
6157ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (admin_SO) {
6158ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      tl_assert(admin_SO->admin_prev == NULL);
6159ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      admin_SO->admin_prev = so;
6160ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      so->admin_next = admin_SO;
6161ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   } else {
6162ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      so->admin_next = NULL;
6163ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   }
6164ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   so->admin_prev = NULL;
6165ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   admin_SO = so;
6166ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* */
6167f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return so;
6168f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6169ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
6170ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjstatic void SO__Dealloc ( SO* so )
6171ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj{
6172f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(so);
6173f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(so->magic == SO_MAGIC);
6174f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (so->viR == VtsID_INVALID) {
6175f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(so->viW == VtsID_INVALID);
6176f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
6177f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(so->viW != VtsID_INVALID);
6178f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcdec(so->viR);
6179f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcdec(so->viW);
6180f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6181f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   so->magic = 0;
6182ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Del from double linked list */
6183ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (so->admin_prev)
6184ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      so->admin_prev->admin_next = so->admin_next;
6185ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (so->admin_next)
6186ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      so->admin_next->admin_prev = so->admin_prev;
6187ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (so == admin_SO)
6188ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      admin_SO = so->admin_next;
6189ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* */
6190f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   HG_(free)( so );
6191f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6192f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6193f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6194f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
6195f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
6196f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// Top Level API                                       //
6197f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                     //
6198f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////
6199f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
62006bd9dc18c043927c1196caba20a327238a179c42florianstatic void show_thread_state ( const HChar* str, Thr* t )
6201f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6202f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (1) return;
6203f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (t->viR == t->viW) {
6204f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6205f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__pp( t->viR );
6206f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","\n");
6207f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
6208f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6209f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__pp( t->viR );
6210f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)(" viW %u==", t->viW);
6211f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__pp( t->viW );
6212f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","\n");
6213f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6214f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6215f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6216f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6217f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjThr* libhb_init (
6218f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj        void        (*get_stacktrace)( Thr*, Addr*, UWord ),
6219d52392dd8f0536931cb0bbf2a27f1621704f32ebsewardj        ExeContext* (*get_EC)( Thr* )
6220f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj     )
6221f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6222f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Thr*  thr;
6223f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID vi;
6224e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
6225e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   // We will have to have to store a large number of these,
6226e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj   // so make sure they're the size we expect them to be.
6227328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   STATIC_ASSERT(sizeof(ScalarTS) == 8);
6228ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
6229ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* because first 1024 unusable */
6230328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   STATIC_ASSERT(SCALARTS_N_THRBITS >= 11);
6231328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   /* so as to fit in a UInt w/ 5 bits to spare (see defn of
6232328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      Thr_n_RCEC and TSW). */
6233328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   STATIC_ASSERT(SCALARTS_N_THRBITS <= 27);
6234ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
6235ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6236ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      (32-bit).  It's not correctness-critical, but there are a lot of
6237ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      them, so it's important from a space viewpoint.  Unfortunately
6238ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      we simply can't pack it into 2 words on a 32-bit target. */
6239328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   STATIC_ASSERT(   (sizeof(UWord) == 8 && sizeof(Thr_n_RCEC) == 16)
6240328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                 || (sizeof(UWord) == 4 && sizeof(Thr_n_RCEC) == 12));
6241328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   STATIC_ASSERT(sizeof(TSW) == sizeof(UInt));
6242ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
6243ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Word sets really are 32 bits.  Even on a 64 bit target. */
6244328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   STATIC_ASSERT(sizeof(WordSetID) == 4);
6245328d6627c26471332610da3f5a0b9cc3cdd410c7philippe   STATIC_ASSERT(sizeof(WordSet) == sizeof(WordSetID));
6246e4cce74e68f351f1dd78ba7a5a412966f8d7702csewardj
6247f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(get_stacktrace);
6248f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(get_EC);
6249f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   main_get_stacktrace   = get_stacktrace;
6250f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   main_get_EC           = get_EC;
6251f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6252f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   // No need to initialise hg_wordfm.
6253f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   // No need to initialise hg_wordset.
6254f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
62557aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   /* Allocated once and never deallocated.  Used as a temporary in
62567aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj      VTS singleton, tick and join operations. */
62577aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
62587aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj   temp_max_sized_VTS->id = VtsID_INVALID;
6259c3508652c3a00c0d0035603a7d738f2fe47e9331philippe   verydead_thread_tables_init();
6260f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vts_set_init();
6261f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vts_tab_init();
6262f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   event_map_init();
6263f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__invalidate_caches();
6264f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6265f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   // initialise shadow memory
62661475a7ffc4f60e78d402ba2572f1624cebbbb76aphilippe   zsm_init( );
6267f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6268f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   thr = Thr__new();
6269f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   vi  = VtsID__mk_Singleton( thr, 1 );
6270f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   thr->viR = vi;
6271f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   thr->viW = vi;
6272f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcinc(thr->viR);
6273f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcinc(thr->viW);
6274f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6275f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   show_thread_state("  root", thr);
6276f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return thr;
6277f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6278f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
627923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
6280f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjThr* libhb_create ( Thr* parent )
6281f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6282f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* The child's VTSs are copies of the parent's VTSs, but ticked at
6283f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      the child's index.  Since the child's index is guaranteed
6284f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      unique, it has never been seen before, so the implicit value
6285f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      before the tick is zero and after that is one. */
6286f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   Thr* child = Thr__new();
6287f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6288f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   child->viR = VtsID__tick( parent->viR, child );
6289f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   child->viW = VtsID__tick( parent->viW, child );
629023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Filter__clear(child->filter, "libhb_create(child)");
6291f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcinc(child->viR);
6292f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcinc(child->viW);
62938ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   /* We need to do note_local_Kw_n_stack_for( child ), but it's too
629423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      early for that - it may not have a valid TId yet.  So, let
629523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      libhb_Thr_resumes pick it up the first time the thread runs. */
6296f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6297f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6298f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6299f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6300f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* and the parent has to move along too */
6301f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcdec(parent->viR);
6302f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcdec(parent->viW);
6303f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   parent->viR = VtsID__tick( parent->viR, parent );
6304f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   parent->viW = VtsID__tick( parent->viW, parent );
630523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Filter__clear(parent->filter, "libhb_create(parent)");
6306f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcinc(parent->viR);
6307f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcinc(parent->viW);
63088ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj   note_local_Kw_n_stack_for( parent );
6309f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6310f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   show_thread_state(" child", child);
6311f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   show_thread_state("parent", parent);
6312f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6313f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return child;
6314f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6315f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6316f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Shut down the library, and print stats (in fact that's _all_
6317f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   this is for. */
6318f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjvoid libhb_shutdown ( Bool show_stats )
6319f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6320f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (show_stats) {
6321f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6322f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6323f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__secmaps_allocd,
6324f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__secmap_ga_space_covered);
6325f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("  linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6326f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__secmap_linesZ_allocd,
6327f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__secmap_linesZ_bytes);
63280fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe      VG_(printf)("  linesF: %'10lu allocd (%'12lu bytes occupied)"
63290fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe                  " (%'10lu used)\n",
633071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                  VG_(sizePA) (LineF_pool_allocator),
633171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                  VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
63320fb30ace7cd9e63623c4ed5dd720f18b0ea9dee3philippe                  shmem__SecMap_used_linesF());
6333f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6334f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  " #%lu scanGC \n",
6335f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__secmaps_in_map_shmem,
6336f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  shmem__SecMap_do_GC(False /* really do GC */),
6337f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__secmaps_scanGC);
6338f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6339f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)(" secmaps: %'10lu in freelist,"
6340f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  " total (scanGCed %'lu, ssetGCed %'lu)\n",
6341f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  SecMap_freelist_length(),
6342f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__secmaps_scanGCed,
6343f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__secmaps_ssetGCed);
6344f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6345f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__secmaps_search, stats__secmaps_search_slow);
6346f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6347f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","\n");
6348f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("   cache: %'lu totrefs (%'lu misses)\n",
6349f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__cache_totrefs, stats__cache_totmisses );
6350f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("   cache: %'14lu Z-fetch,    %'14lu F-fetch\n",
6351f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__cache_Z_fetches, stats__cache_F_fetches );
6352f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("   cache: %'14lu Z-wback,    %'14lu F-wback\n",
6353f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__cache_Z_wbacks, stats__cache_F_wbacks );
6354f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)("   cache: %'14lu flushes_invals\n",
6355f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__cache_flushes_invals );
6356f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("   cache: %'14llu arange_New  %'14llu direct-to-Zreps\n",
6357f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__cache_make_New_arange,
6358f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__cache_make_New_inZrep);
6359f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6360f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","\n");
6361f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("   cline: %'10lu normalises\n",
6362f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__cline_normalises );
636323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      VG_(printf)("   cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
636423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_cread64s,
636523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_cread32s,
636623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_cread16s,
636723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_cread08s );
636823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      VG_(printf)("   cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
636923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_cwrite64s,
637023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_cwrite32s,
637123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_cwrite16s,
637223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_cwrite08s );
637323f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      VG_(printf)("   cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
637423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_swrite64s,
637523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_swrite32s,
637623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_swrite16s,
637723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_swrite08s );
637823f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      VG_(printf)("   cline: s rd1s %'lu, s copy1s %'lu\n",
637923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cline_sread08s, stats__cline_scopy08s );
6380f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)("   cline:    splits: 8to4 %'12lu    4to2 %'12lu"
6381f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  "    2to1 %'12lu\n",
6382f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__cline_64to32splits, stats__cline_32to16splits,
6383f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__cline_16to8splits );
6384f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)("   cline: pulldowns: 8to4 %'12lu    4to2 %'12lu"
6385f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  "    2to1 %'12lu\n",
6386f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6387f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__cline_16to8pulldown );
6388f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (0)
6389f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)("   cline: sizeof(CacheLineZ) %ld,"
6390f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  " covers %ld bytes of arange\n",
6391f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  (Word)sizeof(LineZ),
6392f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  (Word)N_LINE_ARANGE);
6393f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6394f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","\n");
6395f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6396c8028add6294793dfc80a80d920c7dba3a89f312sewardj      VG_(printf)("   libhb: %'13llu msmcread  (%'llu dragovers)\n",
639723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__msmcread, stats__msmcread_change);
6398c8028add6294793dfc80a80d920c7dba3a89f312sewardj      VG_(printf)("   libhb: %'13llu msmcwrite (%'llu dragovers)\n",
639923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__msmcwrite, stats__msmcwrite_change);
640023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj      VG_(printf)("   libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
640123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj                  stats__cmpLEQ_queries, stats__cmpLEQ_misses);
6402f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("   libhb: %'13llu join2  queries (%'llu misses)\n",
6403f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                  stats__join2_queries, stats__join2_misses);
6404f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6405f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","\n");
6406f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)("   libhb: VTSops: tick %'lu,  join %'lu,  cmpLEQ %'lu\n",
6407f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__vts__tick, stats__vts__join,  stats__vts__cmpLEQ );
6408f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)("   libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6409f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6410f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      VG_(printf)("   libhb: VTSset: find__or__clone_and_add %'lu"
6411f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                  " (%'lu allocd)\n",
64127aa38a97a2a49163e465a7c85b8b9469f5c7d8e2sewardj                   stats__vts_set__focaa, stats__vts_set__focaa_a );
6413c8028add6294793dfc80a80d920c7dba3a89f312sewardj      VG_(printf)( "   libhb: VTSops: indexAt_SLOW %'lu\n",
6414c8028add6294793dfc80a80d920c7dba3a89f312sewardj                   stats__vts__indexat_slow );
6415c8028add6294793dfc80a80d920c7dba3a89f312sewardj
6416c8028add6294793dfc80a80d920c7dba3a89f312sewardj      VG_(printf)("%s","\n");
6417f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)(
6418f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         "   libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6419f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6420f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      );
64212bd2326e8361a752dfbd4eced9a61b6224f05272philippe      VG_(printf)("   libhb: #%lu vts_tab GC    #%lu vts pruning\n",
64222bd2326e8361a752dfbd4eced9a61b6224f05272philippe                  stats__vts_tab_GC, stats__vts_pruning);
6423f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)( "   libhb: %lu entries in vts_set\n",
6424f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                   VG_(sizeFM)( vts_set ) );
6425f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6426f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","\n");
6427900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe      {
6428900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe         UInt live = 0;
6429900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe         UInt llexit_done = 0;
6430900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe         UInt joinedwith_done = 0;
6431900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe         UInt llexit_and_joinedwith_done = 0;
6432900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe
6433900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe         Thread* hgthread = get_admin_threads();
6434900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe         tl_assert(hgthread);
6435900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe         while (hgthread) {
6436900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe            Thr* hbthr = hgthread->hbthr;
6437900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe            tl_assert(hbthr);
6438900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe            if (hbthr->llexit_done && hbthr->joinedwith_done)
6439900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe               llexit_and_joinedwith_done++;
6440900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe            else if (hbthr->llexit_done)
6441900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe               llexit_done++;
6442900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe            else if (hbthr->joinedwith_done)
6443900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe               joinedwith_done++;
6444900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe            else
6445900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe               live++;
6446900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe            hgthread = hgthread->admin;
6447900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe         }
64485e5cb009574352880f1bc530e1a73ddaae5003fcflorian         VG_(printf)("   libhb: threads live: %u exit_and_joinedwith %u"
64495e5cb009574352880f1bc530e1a73ddaae5003fcflorian                     " exit %u joinedwith %u\n",
6450900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe                     live, llexit_and_joinedwith_done,
6451900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe                     llexit_done, joinedwith_done);
6452c3508652c3a00c0d0035603a7d738f2fe47e9331philippe         VG_(printf)("   libhb: %d verydead_threads, "
6453c3508652c3a00c0d0035603a7d738f2fe47e9331philippe                     "%d verydead_threads_not_pruned\n",
6454c3508652c3a00c0d0035603a7d738f2fe47e9331philippe                     (int) VG_(sizeXA)( verydead_thread_table),
6455c3508652c3a00c0d0035603a7d738f2fe47e9331philippe                     (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6456c3508652c3a00c0d0035603a7d738f2fe47e9331philippe         tl_assert (VG_(sizeXA)( verydead_thread_table)
6457c3508652c3a00c0d0035603a7d738f2fe47e9331philippe                    + VG_(sizeXA)( verydead_thread_table_not_pruned)
6458c3508652c3a00c0d0035603a7d738f2fe47e9331philippe                    == llexit_and_joinedwith_done);
6459900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe      }
6460900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe
6461900c5357f377ae0cf0dba255e660b1c2df5d93bcphilippe      VG_(printf)("%s","\n");
6462328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      VG_(printf)( "   libhb: oldrefHTN %lu (%'d bytes)\n",
6463328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                   oldrefHTN, (int)(oldrefHTN * sizeof(OldRef)));
6464328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      tl_assert (oldrefHTN == VG_(HT_count_nodes) (oldrefHT));
6465328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      VG_(printf)( "   libhb: oldref lookup found=%lu notfound=%lu\n",
6466328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                   stats__evm__lookup_found, stats__evm__lookup_notfound);
6467328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      if (VG_(clo_verbosity) > 1)
6468328d6627c26471332610da3f5a0b9cc3cdd410c7philippe         VG_(HT_print_stats) (oldrefHT, cmp_oldref_tsw);
6469328d6627c26471332610da3f5a0b9cc3cdd410c7philippe      VG_(printf)( "   libhb: oldref bind tsw/rcec "
64703a085bf78a78ddddbe4b237da3d200ca502fe51ephilippe                   "==/==:%'lu ==/!=:%'lu !=/!=:%'lu\n",
6471328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                   stats__ctxt_eq_tsw_eq_rcec, stats__ctxt_eq_tsw_neq_rcec,
6472328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                   stats__ctxt_neq_tsw_neq_rcec);
64733a085bf78a78ddddbe4b237da3d200ca502fe51ephilippe      VG_(printf)( "   libhb: ctxt__rcdec calls %'lu. rcec gc discards %'lu\n",
6474328d6627c26471332610da3f5a0b9cc3cdd410c7philippe                   stats__ctxt_rcdec_calls, stats__ctxt_rcec_gc_discards);
6475cabdbb5cab3740c7082e44b770a582c8186888e9philippe      VG_(printf)( "   libhb: contextTab: %lu slots,"
6476cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   " %lu cur ents(ref'd %lu),"
647706bc23a809d42c80db585b2f8ca2ea38fbe80a04philippe                   " %lu max ents\n",
6478f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                   (UWord)N_RCEC_TAB,
6479cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   stats__ctxt_tab_curr, RCEC_referenced,
6480cabdbb5cab3740c7082e44b770a582c8186888e9philippe                   stats__ctxt_tab_max );
648147124e91145f71f6db7d0a60031fe49a6b6ea141philippe      {
648247124e91145f71f6db7d0a60031fe49a6b6ea141philippe#        define  MAXCHAIN 10
648347124e91145f71f6db7d0a60031fe49a6b6ea141philippe         UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
648447124e91145f71f6db7d0a60031fe49a6b6ea141philippe         UInt non0chain = 0;
648547124e91145f71f6db7d0a60031fe49a6b6ea141philippe         UInt n;
648647124e91145f71f6db7d0a60031fe49a6b6ea141philippe         UInt i;
648747124e91145f71f6db7d0a60031fe49a6b6ea141philippe         RCEC *p;
648847124e91145f71f6db7d0a60031fe49a6b6ea141philippe
648947124e91145f71f6db7d0a60031fe49a6b6ea141philippe         for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
649047124e91145f71f6db7d0a60031fe49a6b6ea141philippe         for (i = 0; i < N_RCEC_TAB; i++) {
649147124e91145f71f6db7d0a60031fe49a6b6ea141philippe            n = 0;
649247124e91145f71f6db7d0a60031fe49a6b6ea141philippe            for (p = contextTab[i]; p; p = p->next)
649347124e91145f71f6db7d0a60031fe49a6b6ea141philippe               n++;
649447124e91145f71f6db7d0a60031fe49a6b6ea141philippe            if (n < MAXCHAIN)
649547124e91145f71f6db7d0a60031fe49a6b6ea141philippe               chains[n]++;
649647124e91145f71f6db7d0a60031fe49a6b6ea141philippe            else
649747124e91145f71f6db7d0a60031fe49a6b6ea141philippe               chains[MAXCHAIN]++;
649847124e91145f71f6db7d0a60031fe49a6b6ea141philippe            if (n > 0)
649947124e91145f71f6db7d0a60031fe49a6b6ea141philippe               non0chain++;
650047124e91145f71f6db7d0a60031fe49a6b6ea141philippe         }
650147124e91145f71f6db7d0a60031fe49a6b6ea141philippe         VG_(printf)( "   libhb: contextTab chain of [length]=nchain."
650247124e91145f71f6db7d0a60031fe49a6b6ea141philippe                      " Avg chain len %3.1f\n"
650347124e91145f71f6db7d0a60031fe49a6b6ea141philippe                      "        ",
650447124e91145f71f6db7d0a60031fe49a6b6ea141philippe                      (Double)stats__ctxt_tab_curr
650547124e91145f71f6db7d0a60031fe49a6b6ea141philippe                      / (Double)(non0chain ? non0chain : 1));
650647124e91145f71f6db7d0a60031fe49a6b6ea141philippe         for (i = 0; i <= MAXCHAIN; i++) {
650747124e91145f71f6db7d0a60031fe49a6b6ea141philippe            if (chains[i] != 0)
65085e5cb009574352880f1bc530e1a73ddaae5003fcflorian                VG_(printf)( "[%u%s]=%u ",
650947124e91145f71f6db7d0a60031fe49a6b6ea141philippe                             i, i == MAXCHAIN ? "+" : "",
651047124e91145f71f6db7d0a60031fe49a6b6ea141philippe                             chains[i]);
651147124e91145f71f6db7d0a60031fe49a6b6ea141philippe         }
651247124e91145f71f6db7d0a60031fe49a6b6ea141philippe         VG_(printf)( "\n");
651347124e91145f71f6db7d0a60031fe49a6b6ea141philippe#        undef MAXCHAIN
651447124e91145f71f6db7d0a60031fe49a6b6ea141philippe      }
6515f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)( "   libhb: contextTab: %lu queries, %lu cmps\n",
6516f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                   stats__ctxt_tab_qs,
6517f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj                   stats__ctxt_tab_cmps );
6518f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#if 0
6519f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(AvlNode)     = %lu\n", sizeof(AvlNode));
6520f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(WordBag)     = %lu\n", sizeof(WordBag));
6521f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(MaybeWord)   = %lu\n", sizeof(MaybeWord));
6522f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(CacheLine)   = %lu\n", sizeof(CacheLine));
6523f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(LineZ)       = %lu\n", sizeof(LineZ));
6524f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(LineF)       = %lu\n", sizeof(LineF));
6525f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(SecMap)      = %lu\n", sizeof(SecMap));
6526f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(Cache)       = %lu\n", sizeof(Cache));
6527f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(SMCacheEnt)  = %lu\n", sizeof(SMCacheEnt));
6528f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6529f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(VTS)         = %lu\n", sizeof(VTS));
6530f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(ScalarTS)    = %lu\n", sizeof(ScalarTS));
6531f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(VtsTE)       = %lu\n", sizeof(VtsTE));
6532f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(MSMInfo)     = %lu\n", sizeof(MSMInfo));
6533f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6534f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(struct _XArray)     = %lu\n", sizeof(struct _XArray));
6535f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(struct _WordFM)     = %lu\n", sizeof(struct _WordFM));
6536f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(struct _Thr)     = %lu\n", sizeof(struct _Thr));
6537f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("sizeof(struct _SO)     = %lu\n", sizeof(struct _SO));
6538f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#endif
6539f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6540f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","<<< END libhb stats >>>\n");
6541f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VG_(printf)("%s","\n");
6542f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6543f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6544f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6545f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6546ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Receive notification that a thread has low level exited.  The
6547ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   significance here is that we do not expect to see any more memory
6548ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   references from it. */
6549f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjvoid libhb_async_exit ( Thr* thr )
6550f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
655123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   tl_assert(thr);
6552ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(!thr->llexit_done);
6553ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   thr->llexit_done = True;
65542d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj
65552d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   /* free up Filter and local_Kws_n_stacks (well, actually not the
65562d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      latter ..) */
65572d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   tl_assert(thr->filter);
65582d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   HG_(free)(thr->filter);
65592d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   thr->filter = NULL;
65602d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj
6561ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Tell the VTS mechanism this thread has exited, so it can
6562ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      participate in VTS pruning.  Note this can only happen if the
6563ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      thread has both ll_exited and has been joined with. */
6564ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (thr->joinedwith_done)
6565ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VTS__declare_thread_very_dead(thr);
6566ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
65672d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   /* Another space-accuracy tradeoff.  Do we want to be able to show
65682d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      H1 history for conflicts in threads which have since exited?  If
65692d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      yes, then we better not free up thr->local_Kws_n_stacks.  The
65702d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      downside is a potential per-thread leak of up to
65712d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
65722d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      XArray average overcommit factor is (1.5 I'd guess). */
65732d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   // hence:
65742d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   // VG_(deleteXA)(thr->local_Kws_n_stacks);
65752d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   // thr->local_Kws_n_stacks = NULL;
6576f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6577f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6578ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj/* Receive notification that a thread has been joined with.  The
6579ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   significance here is that we do not expect to see any further
6580ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   references to its vector clocks (Thr::viR and Thr::viW). */
6581ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardjvoid libhb_joinedwith_done ( Thr* thr )
6582ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj{
6583ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(thr);
6584ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   /* Caller must ensure that this is only ever called once per Thr. */
6585ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   tl_assert(!thr->joinedwith_done);
6586ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   thr->joinedwith_done = True;
6587ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (thr->llexit_done)
6588ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj      VTS__declare_thread_very_dead(thr);
6589ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj}
6590ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
6591ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj
6592f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* Both Segs and SOs point to VTSs.  However, there is no sharing, so
6593f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   a Seg that points at a VTS is its one-and-only owner, and ditto for
6594f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   a SO that points at a VTS. */
6595f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6596f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjSO* libhb_so_alloc ( void )
6597f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6598f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return SO__Alloc();
6599f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6600f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6601f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjvoid libhb_so_dealloc ( SO* so )
6602f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6603f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(so);
6604f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(so->magic == SO_MAGIC);
6605f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SO__Dealloc(so);
6606f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6607f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6608f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/* See comments in libhb.h for details on the meaning of
6609f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   strong vs weak sends and strong vs weak receives. */
6610f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjvoid libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6611f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6612f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* Copy the VTSs from 'thr' into the sync object, and then move
6613f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      the thread along one step. */
6614f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6615f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(so);
6616f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(so->magic == SO_MAGIC);
6617f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6618f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* stay sane .. a thread's read-clock must always lead or be the
6619f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      same as its write-clock */
662023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
662123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj     tl_assert(leq);
6622f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6623f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6624f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* since we're overwriting the VtsIDs in the SO, we need to drop
6625f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      any references made by the previous contents thereof */
6626f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (so->viR == VtsID_INVALID) {
6627f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(so->viW == VtsID_INVALID);
6628f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      so->viR = thr->viR;
6629f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      so->viW = thr->viW;
6630f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcinc(so->viR);
6631f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcinc(so->viW);
6632f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
6633f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* In a strong send, we dump any previous VC in the SO and
6634f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         install the sending thread's VC instead.  For a weak send we
6635f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         must join2 with what's already there. */
6636f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(so->viW != VtsID_INVALID);
6637f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcdec(so->viR);
6638f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcdec(so->viW);
6639f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6640f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6641f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcinc(so->viR);
6642f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcinc(so->viW);
6643f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6644f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6645f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   /* move both parent clocks along */
6646f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcdec(thr->viR);
6647f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcdec(thr->viW);
6648f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   thr->viR = VtsID__tick( thr->viR, thr );
6649f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   thr->viW = VtsID__tick( thr->viW, thr );
6650ffce8159a95134f0a2bc1cea3c3e6e265f096d9fsewardj   if (!thr->llexit_done) {
66512d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj      Filter__clear(thr->filter, "libhb_so_send");
66528ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      note_local_Kw_n_stack_for(thr);
66532d2ea2ff019a1c5b49b532541e30b22cae252b2dsewardj   }
6654f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcinc(thr->viR);
6655f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   VtsID__rcinc(thr->viW);
665623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
6657f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (strong_send)
6658f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      show_thread_state("s-send", thr);
6659f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   else
6660f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      show_thread_state("w-send", thr);
6661f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6662f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6663f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjvoid libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6664f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6665f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(so);
6666f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(so->magic == SO_MAGIC);
6667f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6668f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (so->viR != VtsID_INVALID) {
6669f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(so->viW != VtsID_INVALID);
6670f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6671f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* Weak receive (basically, an R-acquisition of a R-W lock).
6672f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         This advances the read-clock of the receiver, but not the
6673f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         write-clock. */
6674f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcdec(thr->viR);
6675f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      thr->viR = VtsID__join2( thr->viR, so->viR );
6676f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      VtsID__rcinc(thr->viR);
6677f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
667890eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj      /* At one point (r10589) it seemed safest to tick the clocks for
667990eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj         the receiving thread after the join.  But on reflection, I
668090eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj         wonder if that might cause it to 'overtake' constraints,
668190eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj         which could lead to missing races.  So, back out that part of
668290eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj         r10589. */
668390eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj      //VtsID__rcdec(thr->viR);
668490eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj      //thr->viR = VtsID__tick( thr->viR, thr );
668590eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj      //VtsID__rcinc(thr->viR);
668623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
6687f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* For a strong receive, we also advance the receiver's write
6688f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         clock, which means the receive as a whole is essentially
6689f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         equivalent to a W-acquisition of a R-W lock. */
6690f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (strong_recv) {
6691f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         VtsID__rcdec(thr->viW);
6692f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         thr->viW = VtsID__join2( thr->viW, so->viW );
6693f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         VtsID__rcinc(thr->viW);
669423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
669590eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj         /* See comment just above, re r10589. */
669690eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj         //VtsID__rcdec(thr->viW);
669790eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj         //thr->viW = VtsID__tick( thr->viW, thr );
669890eb22e2423a164cabb7a0632a8fc1d1bc8d31a4sewardj         //VtsID__rcinc(thr->viW);
6699f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      }
6700f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6701f4845dc31a3e7a283f669ffbaa4add31d70fd6c3sewardj      if (thr->filter)
6702f4845dc31a3e7a283f669ffbaa4add31d70fd6c3sewardj         Filter__clear(thr->filter, "libhb_so_recv");
67038ab2c1303922dd85467e00cb7b5594d73b7043c6sewardj      note_local_Kw_n_stack_for(thr);
670423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj
6705f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      if (strong_recv)
6706f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         show_thread_state("s-recv", thr);
6707f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      else
6708f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         show_thread_state("w-recv", thr);
6709f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6710f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
6711f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(so->viW == VtsID_INVALID);
6712f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      /* Deal with degenerate case: 'so' has no vts, so there has been
6713f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj         no message posted to it.  Just ignore this case. */
6714f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      show_thread_state("d-recv", thr);
6715f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6716f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6717f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6718f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjBool libhb_so_everSent ( SO* so )
6719f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6720f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (so->viR == VtsID_INVALID) {
6721f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(so->viW == VtsID_INVALID);
6722f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return False;
6723f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   } else {
6724f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      tl_assert(so->viW != VtsID_INVALID);
6725f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj      return True;
6726f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   }
6727f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6728f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6729f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define XXX1 0 // 0x67a106c
6730f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj#define XXX2 0
6731f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
673223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjstatic inline Bool TRACEME(Addr a, SizeT szB) {
6733f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6734f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6735f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   return False;
6736f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
67370c8a47ca278cba69e103c13a553c84aa05beffabflorianstatic void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
67386bf3726ebf7a04ca48a5e6cb1ad7a3065054e54eflorian{
673923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj  SVal sv = zsm_sread08(a);
6740f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6741f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  show_thread_state("", thr);
6742f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj  VG_(printf)("%s","\n");
6743f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6744f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
674523f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
6746f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
6747f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   SVal sv = SVal__mkC(thr->viW, thr->viW);
6748f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(is_sane_SVal_C(sv));
674923f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
675023f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_sset_range( a, szB, sv );
675123f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Filter__clear_range( thr->filter, a, szB );
675223f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
6753f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6754f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6755fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardjvoid libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
6756f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
675723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   /* do nothing */
6758f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
6759f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
6760f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6761f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* Set the lines zix_start till zix_end to NOACCESS. */
6762f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic void zsm_secmap_line_range_noaccess (SecMap *sm,
6763f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                                            UInt zix_start, UInt zix_end)
6764f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
6765f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   for (UInt lz = zix_start; lz <= zix_end; lz++) {
6766f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      LineZ* lineZ;
6767f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      lineZ = &sm->linesZ[lz];
6768f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (lineZ->dict[0] != SVal_INVALID) {
6769f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         rcdec_LineZ(lineZ);
677071ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         lineZ->dict[0] = SVal_NOACCESS;
677171ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6772f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      } else {
677371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe         clear_LineF_of_Z(lineZ);
6774f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
6775f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6776f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6777f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
6778f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
6779f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6780f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* Set the given range to SVal_NOACCESS in-place in the secmap.
6781f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   a must be cacheline aligned. len must be a multiple of a cacheline
6782f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   and must be < N_SECMAP_ARANGE. */
6783f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6784f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
6785f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   tl_assert (is_valid_scache_tag (a));
6786f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6787f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   tl_assert (len < N_SECMAP_ARANGE);
6788f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6789f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SecMap *sm1 = shmem__find_SecMap (a);
6790f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6791f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   UWord zix_start = shmem__get_SecMap_offset(a          ) >> N_LINE_BITS;
6792f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   UWord zix_end   = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6793f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6794f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (sm1) {
6795f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6796f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_secmap_line_range_noaccess (sm1, zix_start,
6797f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                                      sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6798f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
6799f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (sm2 && sm1 != sm2) {
6800f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6801f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6802f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
6803f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
6804f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6805f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* Set the given address range to SVal_NOACCESS.
6806f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
6807f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippestatic void zsm_sset_range_noaccess (Addr addr, SizeT len)
6808f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
6809f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /*
6810f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       BPC = Before, Partial Cacheline, = addr
6811f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe             (i.e. starting inside a cacheline/inside a SecMap)
6812f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       BFC = Before, Full Cacheline(s), but not full SecMap
6813f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe             (i.e. starting inside a SecMap)
6814f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       FSM = Full SecMap(s)
6815f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe             (i.e. starting a SecMap)
6816f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       AFC = After, Full Cacheline(s), but not full SecMap
6817f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe             (i.e. first address after the full SecMap(s))
6818f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       APC = After, Partial Cacheline, i.e. first address after the
6819f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe             full CacheLines).
6820f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       ARE = After Range End = addr+len = first address not part of the range.
6821f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6822f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       If addr     starts a Cacheline, then BPC == BFC.
6823f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       If addr     starts a SecMap,    then BPC == BFC == FSM.
6824f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       If addr+len starts a SecMap,    then APC == ARE == AFC
6825f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe       If addr+len starts a Cacheline, then APC == ARE
6826f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   */
6827f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Addr ARE = addr + len;
6828f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Addr BPC = addr;
6829f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6830f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6831f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6832f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6833f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SizeT Plen = len; // Plen will be split between the following:
6834f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SizeT BPClen;
6835f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SizeT BFClen;
6836f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SizeT FSMlen;
6837f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SizeT AFClen;
6838f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   SizeT APClen;
6839f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6840f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* Consumes from Plen the nr of bytes between from and to.
6841f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      from and to must be aligned on a multiple of round.
6842f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      The length consumed will be a multiple of round, with
6843f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      a maximum of Plen. */
6844f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe#  define PlenCONSUME(from, to, round, consumed) \
6845f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   do {                                          \
6846f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (from < to) {                              \
6847f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (to - from < Plen)                      \
6848f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         consumed = to - from;                   \
6849f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      else                                       \
6850f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         consumed = ROUNDDN(Plen, round);        \
6851f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   } else {                                      \
6852f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      consumed = 0;                              \
6853f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }                                             \
6854f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Plen -= consumed; } while (0)
6855f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6856f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   PlenCONSUME(BPC, BFC, 1,               BPClen);
6857f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   PlenCONSUME(BFC, FSM, N_LINE_ARANGE,   BFClen);
6858f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6859f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   PlenCONSUME(AFC, APC, N_LINE_ARANGE,   AFClen);
6860f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   PlenCONSUME(APC, ARE, 1,               APClen);
6861f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6862f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (0)
68635e5cb009574352880f1bc530e1a73ddaae5003fcflorian      VG_(printf) ("addr %p[%lu] ARE %p"
68645e5cb009574352880f1bc530e1a73ddaae5003fcflorian                   " BPC %p[%lu] BFC %p[%lu] FSM %p[%lu]"
68655e5cb009574352880f1bc530e1a73ddaae5003fcflorian                   " AFC %p[%lu] APC %p[%lu]\n",
6866f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   (void*)addr, len, (void*)ARE,
6867f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6868f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                   (void*)AFC, AFClen, (void*)APC, APClen);
6869f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6870f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   tl_assert (Plen == 0);
6871f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6872f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6873f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6874f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* First we set the partial cachelines. This is done through the cache. */
6875f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (BPClen > 0)
6876f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6877f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (APClen > 0)
6878f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6879f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6880f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* After this, we will not use the cache anymore. We will directly work
6881f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      in-place on the z shadow memory in SecMap(s).
6882f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      So, we invalidate the cachelines for the whole range we are setting
6883f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      to NOACCESS below. */
6884f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   shmem__invalidate_scache_range (BFC, APC - BFC);
6885f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6886f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (BFClen > 0)
6887f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6888f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (AFClen > 0)
6889f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6890f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6891f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (FSMlen > 0) {
6892f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6893f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         free list. */
6894f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      Addr  sm_start = FSM;
6895f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      while (sm_start < AFC) {
6896f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         SecMap *sm = shmem__find_SecMap (sm_start);
6897f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         if (sm) {
6898f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            Addr gaKey;
6899f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            SecMap *fm_sm;
6900f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6901f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6902f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
690371ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               LineZ *lineZ = &sm->linesZ[lz];
690471ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               if (LIKELY(lineZ->dict[0] != SVal_INVALID))
690571ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                  rcdec_LineZ(lineZ);
690671ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               else
690771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe                  clear_LineF_of_Z(lineZ);
6908f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            }
6909f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6910f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe               tl_assert (0);
6911f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            stats__secmaps_in_map_shmem--;
6912f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            tl_assert (gaKey == sm_start);
6913f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            tl_assert (sm == fm_sm);
6914f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            stats__secmaps_ssetGCed++;
6915f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            push_SecMap_on_freelist (sm);
6916f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         }
6917f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         sm_start += N_SECMAP_ARANGE;
6918f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
6919f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      tl_assert (sm_start == AFC);
6920f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6921f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      /* The above loop might have kept copies of freed SecMap in the smCache.
6922f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         => clear them. */
6923f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6924f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         smCache[0].gaKey = 1;
6925f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         smCache[0].sm = NULL;
6926f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
6927f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6928f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         smCache[1].gaKey = 1;
6929f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         smCache[1].sm = NULL;
6930f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
6931f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
6932f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         smCache[2].gaKey = 1;
6933f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         smCache[2].sm = NULL;
6934f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
6935f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
6936f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
6937f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
6938f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6939fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardjvoid libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6940fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardj{
6941fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardj   /* This really does put the requested range in NoAccess.  It's
6942fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardj      expensive though. */
6943fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardj   SVal sv = SVal_NOACCESS;
6944fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardj   tl_assert(is_sane_SVal_C(sv));
6945f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (LIKELY(szB < 2 * N_LINE_ARANGE))
6946f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6947f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   else
6948f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_sset_range_noaccess (a, szB);
6949fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardj   Filter__clear_range( thr->filter, a, szB );
6950fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardj}
6951fd35d49d5799584a7fe41d2f535c1ba2dd5e17b4sewardj
6952f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe/* Works byte at a time. Can be optimised if needed. */
6953f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippeUWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
6954f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe{
6955f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   UWord anr = 0; // nr of bytes addressable.
6956f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6957f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* Get the accessibility of each byte. Pay attention to not
6958f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      create SecMap or LineZ when checking if a byte is addressable.
6959f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6960f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      Note: this is used for client request. Performance deemed not critical.
6961f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      So for simplicity, we work byte per byte.
6962f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      Performance could be improved  by working with full cachelines
6963f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      or with full SecMap, when reaching a cacheline or secmap boundary. */
6964f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   for (SizeT i = 0; i < len; i++) {
6965f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      SVal       sv = SVal_INVALID;
6966f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      Addr       b = a + i;
6967f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      Addr       tag = b & ~(N_LINE_ARANGE - 1);
6968f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      UWord      wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
6969f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      UWord      cloff = get_cacheline_offset(b);
6970f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
6971f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      /* Note: we do not use get_cacheline(b) to avoid creating cachelines
6972f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         and/or SecMap for non addressable bytes. */
6973f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (tag == cache_shmem.tags0[wix]) {
6974f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         CacheLine copy = cache_shmem.lyns0[wix];
6975f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         /* We work on a copy of the cacheline, as we do not want to
6976f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            record the client request as a real read.
6977f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            The below is somewhat similar to zsm_sapply08__msmcread but
6978f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            avoids side effects on the cache. */
6979f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         UWord toff = get_tree_offset(b); /* == 0 .. 7 */
6980f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         UWord tno  = get_treeno(b);
6981f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         UShort descr = copy.descrs[tno];
6982f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
6983f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            SVal* tree = &copy.svals[tno << 3];
6984f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
6985f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         }
6986f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         sv = copy.svals[cloff];
6987f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      } else {
6988f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         /* Byte not found in the cacheline. Search for a SecMap. */
6989f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         SecMap *sm = shmem__find_SecMap(b);
6990f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         LineZ *lineZ;
6991f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         if (sm == NULL)
6992f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            sv = SVal_NOACCESS;
6993f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         else {
6994f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
6995f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            lineZ = &sm->linesZ[zix];
6996f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            if (lineZ->dict[0] == SVal_INVALID) {
699771ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               LineF *lineF = SVal2Ptr(lineZ->dict[1]);
699871ed3c9382fa816a84731e9a2ddb9eda7d5624a6philippe               sv = lineF->w64s[cloff];
6999f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            } else {
7000f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe               UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7001f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe               sv = lineZ->dict[ix];
7002f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            }
7003f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         }
7004f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
7005f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
7006f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      tl_assert (sv != SVal_INVALID);
7007f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (sv == SVal_NOACCESS) {
7008f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         if (abits)
7009f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            abits[i] = 0x00;
7010f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      } else {
7011f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         if (abits)
7012f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe            abits[i] = 0xff;
7013f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         anr++;
7014f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      }
7015f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
7016f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
7017f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   return anr;
7018f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe}
7019f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
7020f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
7021406bac81a19666c213cd8b03457a105ea33a38easewardjvoid libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7022406bac81a19666c213cd8b03457a105ea33a38easewardj{
7023406bac81a19666c213cd8b03457a105ea33a38easewardj   SVal sv = SVal_NOACCESS;
7024406bac81a19666c213cd8b03457a105ea33a38easewardj   tl_assert(is_sane_SVal_C(sv));
7025406bac81a19666c213cd8b03457a105ea33a38easewardj   if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
7026f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (LIKELY(szB < 2 * N_LINE_ARANGE))
7027f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7028f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   else
7029f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      zsm_sset_range_noaccess (a, szB);
7030406bac81a19666c213cd8b03457a105ea33a38easewardj   Filter__clear_range( thr->filter, a, szB );
7031406bac81a19666c213cd8b03457a105ea33a38easewardj   if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7032406bac81a19666c213cd8b03457a105ea33a38easewardj}
7033406bac81a19666c213cd8b03457a105ea33a38easewardj
70340b20a15992b304fa652bb84ee4c3b63ff1eeef84sewardjThread* libhb_get_Thr_hgthread ( Thr* thr ) {
7035f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(thr);
703660626649e5fa6cd21af377fde5e83803fc136f61sewardj   return thr->hgthread;
7037f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
7038f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
70390b20a15992b304fa652bb84ee4c3b63ff1eeef84sewardjvoid libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
7040f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj   tl_assert(thr);
70410b20a15992b304fa652bb84ee4c3b63ff1eeef84sewardj   thr->hgthread = hgthread;
7042f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
7043f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
704423f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardjvoid libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
7045f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
704623f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   zsm_scopy_range(src, dst, len);
704723f1200ba3aa3d8dbb484626ba1bdb7cfcf3b3a9sewardj   Filter__clear_range( thr->filter, dst, len );
7048f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
7049f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
7050f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardjvoid libhb_maybe_GC ( void )
7051f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj{
7052cabdbb5cab3740c7082e44b770a582c8186888e9philippe   /* GC the unreferenced (zero rc) RCECs when
7053e0829e01793448f888699d0ff928dac93c395c36philippe         (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7054e0829e01793448f888699d0ff928dac93c395c36philippe             with mostly NULL ptr)
7055e0829e01793448f888699d0ff928dac93c395c36philippe     and (2) approaching the max nr of RCEC (as we have in any case
7056e0829e01793448f888699d0ff928dac93c395c36philippe             at least that amount of RCEC in the pool allocator)
7057e0829e01793448f888699d0ff928dac93c395c36philippe             Note: the margin allows to avoid a small but constant increase
7058e0829e01793448f888699d0ff928dac93c395c36philippe             of the max nr of RCEC due to the fact that libhb_maybe_GC is
7059e0829e01793448f888699d0ff928dac93c395c36philippe             not called when the current nr of RCEC exactly reaches the max.
7060e0829e01793448f888699d0ff928dac93c395c36philippe     and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7061e0829e01793448f888699d0ff928dac93c395c36philippe     Avoid growing too much the nr of RCEC keeps the memory use low,
7062e0829e01793448f888699d0ff928dac93c395c36philippe     and avoids to have too many elements in the (fixed) contextTab hashtable.
7063e0829e01793448f888699d0ff928dac93c395c36philippe   */
7064cabdbb5cab3740c7082e44b770a582c8186888e9philippe   if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
7065e0829e01793448f888699d0ff928dac93c395c36philippe                && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
7066f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
7067cabdbb5cab3740c7082e44b770a582c8186888e9philippe      do_RCEC_GC();
7068158404e838998fbdb63ed79b175bc45e91e21998philippe
7069f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* If there are still no entries available (all the table entries are full),
7070ad4e979f408239dabbaae955d8ffcb84a51a5c85florian      and we hit the threshold point, then do a GC */
7071f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7072f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7073f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (UNLIKELY (vts_tab_GC))
7074f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      vts_tab__do_GC( False/*don't show stats*/ );
7075f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe
7076f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   /* scan GC the SecMaps when
7077f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe          (1) no SecMap in the freelist
7078f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      and (2) the current nr of live secmaps exceeds the threshold. */
7079f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   if (UNLIKELY(SecMap_freelist == NULL
7080f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe                && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7081f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      // If we did a vts tab GC, then no need to flush the cache again.
7082f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      if (!vts_tab_GC)
7083f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe         zsm_flush_cache();
7084f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe      shmem__SecMap_do_GC(True);
7085f54cb66eba8137cac5b3ee8be29bb24c1ddfb535philippe   }
7086cabdbb5cab3740c7082e44b770a582c8186888e9philippe
7087cabdbb5cab3740c7082e44b770a582c8186888e9philippe   /* Check the reference counts (expensive) */
7088cabdbb5cab3740c7082e44b770a582c8186888e9philippe   if (CHECK_CEM)
7089cabdbb5cab3740c7082e44b770a582c8186888e9philippe      event_map__check_reference_counts();
7090f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj}
7091f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
7092f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
7093f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
7094f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
7095f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
7096f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj// SECTION END main library                                    //
7097f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj//                                                             //
7098f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
7099f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/////////////////////////////////////////////////////////////////
7100f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj
7101f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--------------------------------------------------------------------*/
7102f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--- end                                             libhb_main.c ---*/
7103f98e1c03ce4bea1fb092cdea5571c41f29f6df9bsewardj/*--------------------------------------------------------------------*/
7104