hg_wordset.c revision 9bea4c13fca0e3bb4b719dcb3ed63d47d479294e
1
2/*--------------------------------------------------------------------*/
3/*--- Sets of words, with unique set identifiers.                  ---*/
4/*---                                                 hg_wordset.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Helgrind, a Valgrind tool for detecting errors
9   in threaded programs.
10
11   Copyright (C) 2007-2010 OpenWorks LLP
12       info@open-works.co.uk
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37#include "pub_tool_basics.h"
38#include "pub_tool_libcassert.h"
39#include "pub_tool_libcbase.h"
40#include "pub_tool_libcprint.h"
41#include "pub_tool_threadstate.h"
42#include "pub_tool_wordfm.h"
43
44#include "hg_basics.h"
45#include "hg_wordset.h"     /* self */
46
47//------------------------------------------------------------------//
48//--- Word Cache                                                 ---//
49//------------------------------------------------------------------//
50
51typedef
52   struct { UWord arg1; UWord arg2; UWord res; }
53   WCacheEnt;
54
55/* Each cache is a fixed sized array of N_WCACHE_STAT_MAX entries.
56   However only the first .dynMax are used.  This is because at some
57   point, expanding the cache further overall gives a slowdown because
58   searching more entries more than negates any performance advantage
59   from caching those entries in the first place.  Hence use .dynMax
60   to allow the size of the cache(s) to be set differently for each
61   different WordSetU. */
62#define N_WCACHE_STAT_MAX 32
63typedef
64   struct {
65      WCacheEnt ent[N_WCACHE_STAT_MAX];
66      UWord     dynMax; /* 1 .. N_WCACHE_STAT_MAX inclusive */
67      UWord     inUse;  /* 0 .. dynMax inclusive */
68   }
69   WCache;
70
71#define WCache_INIT(_zzcache,_zzdynmax)                              \
72   do {                                                              \
73      tl_assert((_zzdynmax) >= 1);                                   \
74      tl_assert((_zzdynmax) <= N_WCACHE_STAT_MAX);                   \
75      (_zzcache).dynMax = (_zzdynmax);                               \
76      (_zzcache).inUse = 0;                                          \
77   } while (0)
78
79#define WCache_LOOKUP_AND_RETURN(_retty,_zzcache,_zzarg1,_zzarg2)    \
80   do {                                                              \
81      UWord   _i;                                                    \
82      UWord   _arg1  = (UWord)(_zzarg1);                             \
83      UWord   _arg2  = (UWord)(_zzarg2);                             \
84      WCache* _cache = &(_zzcache);                                  \
85      tl_assert(_cache->dynMax >= 1);                                \
86      tl_assert(_cache->dynMax <= N_WCACHE_STAT_MAX);                \
87      tl_assert(_cache->inUse >= 0);                                 \
88      tl_assert(_cache->inUse <= _cache->dynMax);                    \
89      if (_cache->inUse > 0) {                                       \
90         if (_cache->ent[0].arg1 == _arg1                            \
91             && _cache->ent[0].arg2 == _arg2)                        \
92            return (_retty)_cache->ent[0].res;                       \
93         for (_i = 1; _i < _cache->inUse; _i++) {                    \
94            if (_cache->ent[_i].arg1 == _arg1                        \
95                && _cache->ent[_i].arg2 == _arg2) {                  \
96               WCacheEnt tmp     = _cache->ent[_i-1];                \
97               _cache->ent[_i-1] = _cache->ent[_i];                  \
98               _cache->ent[_i]   = tmp;                              \
99               return (_retty)_cache->ent[_i-1].res;                 \
100            }                                                        \
101         }                                                           \
102      }                                                              \
103   } while (0)
104
105#define WCache_UPDATE(_zzcache,_zzarg1,_zzarg2,_zzresult)            \
106   do {                                                              \
107      Word    _i;                                                    \
108      UWord   _arg1  = (UWord)(_zzarg1);                             \
109      UWord   _arg2  = (UWord)(_zzarg2);                             \
110      UWord   _res   = (UWord)(_zzresult);                           \
111      WCache* _cache = &(_zzcache);                                  \
112      tl_assert(_cache->dynMax >= 1);                                \
113      tl_assert(_cache->dynMax <= N_WCACHE_STAT_MAX);                \
114      tl_assert(_cache->inUse >= 0);                                 \
115      tl_assert(_cache->inUse <= _cache->dynMax);                    \
116      if (_cache->inUse < _cache->dynMax)                            \
117         _cache->inUse++;                                            \
118      for (_i = _cache->inUse-1; _i >= 1; _i--)                      \
119         _cache->ent[_i] = _cache->ent[_i-1];                        \
120      _cache->ent[0].arg1 = _arg1;                                   \
121      _cache->ent[0].arg2 = _arg2;                                   \
122      _cache->ent[0].res  = _res;                                    \
123   } while (0)
124
125
126//------------------------------------------------------------------//
127//---                          WordSet                           ---//
128//---                       Implementation                       ---//
129//------------------------------------------------------------------//
130
131typedef
132   struct {
133      WordSetU* owner; /* for sanity checking */
134      UWord*    words;
135      UWord     size; /* Really this should be SizeT */
136   }
137   WordVec;
138
139/* ix2vec[0 .. ix2vec_used-1] are pointers to the lock sets (WordVecs)
140   really.  vec2ix is the inverse mapping, mapping WordVec* to the
141   corresponding ix2vec entry number.  The two mappings are mutually
142   redundant. */
143struct _WordSetU {
144      void*     (*alloc)(HChar*,SizeT);
145      HChar*    cc;
146      void      (*dealloc)(void*);
147      WordFM*   vec2ix; /* WordVec-to-WordSet mapping tree */
148      WordVec** ix2vec; /* WordSet-to-WordVec mapping array */
149      UWord     ix2vec_size;
150      UWord     ix2vec_used;
151      WordSet   empty; /* cached, for speed */
152      /* Caches for some operations */
153      WCache    cache_addTo;
154      WCache    cache_delFrom;
155      WCache    cache_intersect;
156      WCache    cache_minus;
157      /* Stats */
158      UWord     n_add;
159      UWord     n_add_uncached;
160      UWord     n_del;
161      UWord     n_del_uncached;
162      UWord     n_union;
163      UWord     n_intersect;
164      UWord     n_intersect_uncached;
165      UWord     n_minus;
166      UWord     n_minus_uncached;
167      UWord     n_elem;
168      UWord     n_doubleton;
169      UWord     n_isEmpty;
170      UWord     n_isSingleton;
171      UWord     n_anyElementOf;
172      UWord     n_isSubsetOf;
173   };
174
175/* Create a new WordVec of the given size. */
176
177static WordVec* new_WV_of_size ( WordSetU* wsu, UWord sz )
178{
179   WordVec* wv;
180   tl_assert(sz >= 0);
181   wv = wsu->alloc( wsu->cc, sizeof(WordVec) );
182   wv->owner = wsu;
183   wv->words = NULL;
184   wv->size = sz;
185   if (sz > 0) {
186     wv->words = wsu->alloc( wsu->cc, (SizeT)sz * sizeof(UWord) );
187   }
188   return wv;
189}
190
191static void delete_WV ( WordVec* wv )
192{
193   void (*dealloc)(void*) = wv->owner->dealloc;
194   if (wv->words) {
195      dealloc(wv->words);
196   }
197   dealloc(wv);
198}
199static void delete_WV_for_FM ( UWord wv ) {
200   delete_WV( (WordVec*)wv );
201}
202
203static Word cmp_WordVecs_for_FM ( UWord wv1W, UWord wv2W )
204{
205   UWord    i;
206   WordVec* wv1    = (WordVec*)wv1W;
207   WordVec* wv2    = (WordVec*)wv2W;
208   UWord    common = wv1->size < wv2->size ? wv1->size : wv2->size;
209   for (i = 0; i < common; i++) {
210      if (wv1->words[i] == wv2->words[i])
211         continue;
212      if (wv1->words[i] < wv2->words[i])
213         return -1;
214      if (wv1->words[i] > wv2->words[i])
215         return 1;
216      tl_assert(0);
217   }
218   /* Ok, the common sections are identical.  So now consider the
219      tails.  Both sets are considered to finish in an implied
220      sequence of -infinity. */
221   if (wv1->size < wv2->size) {
222      tl_assert(common == wv1->size);
223      return -1; /* impliedly, wv1 contains some -infinitys in places
224                    where wv2 doesn't. */
225   }
226   if (wv1->size > wv2->size) {
227      tl_assert(common == wv2->size);
228      return 1;
229   }
230   tl_assert(common == wv1->size);
231   return 0; /* identical */
232}
233
234static void ensure_ix2vec_space ( WordSetU* wsu )
235{
236   UInt      i, new_sz;
237   WordVec** new_vec;
238   tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
239   if (wsu->ix2vec_used < wsu->ix2vec_size)
240      return;
241   new_sz = 2 * wsu->ix2vec_size;
242   if (new_sz == 0) new_sz = 2;
243   new_vec = wsu->alloc( wsu->cc, new_sz * sizeof(WordVec*) );
244   tl_assert(new_vec);
245   for (i = 0; i < wsu->ix2vec_size; i++)
246      new_vec[i] = wsu->ix2vec[i];
247   if (wsu->ix2vec)
248      wsu->dealloc(wsu->ix2vec);
249   wsu->ix2vec = new_vec;
250   wsu->ix2vec_size = new_sz;
251}
252
253/* Index into a WordSetU, doing the obvious range check.  Failure of
254   the assertions marked XXX and YYY is an indication of passing the
255   wrong WordSetU* in the public API of this module. */
256static WordVec* do_ix2vec ( WordSetU* wsu, WordSet ws )
257{
258   WordVec* wv;
259   tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
260   if (wsu->ix2vec_used > 0)
261      tl_assert(wsu->ix2vec);
262   /* If this assertion fails, it may mean you supplied a 'ws'
263      that does not come from the 'wsu' universe. */
264   tl_assert(ws < wsu->ix2vec_used); /* XXX */
265   wv = wsu->ix2vec[ws];
266   /* Make absolutely sure that 'ws' is a member of 'wsu'. */
267   tl_assert(wv);
268   tl_assert(wv->owner == wsu); /* YYY */
269   return wv;
270}
271
272/* See if wv is contained within wsu.  If so, deallocate wv and return
273   the index of the already-present copy.  If not, add wv to both the
274   vec2ix and ix2vec mappings and return its index.
275*/
276static WordSet add_or_dealloc_WordVec( WordSetU* wsu, WordVec* wv_new )
277{
278   Bool     have;
279   WordVec* wv_old;
280   UWord/*Set*/ ix_old = -1;
281   /* Really WordSet, but need something that can safely be casted to
282      a Word* in the lookupFM.  Making it WordSet (which is 32 bits)
283      causes failures on a 64-bit platform. */
284   tl_assert(wv_new->owner == wsu);
285   have = VG_(lookupFM)( wsu->vec2ix,
286                         (Word*)&wv_old, (Word*)&ix_old,
287                         (Word)wv_new );
288   if (have) {
289      tl_assert(wv_old != wv_new);
290      tl_assert(wv_old);
291      tl_assert(wv_old->owner == wsu);
292      tl_assert(ix_old < wsu->ix2vec_used);
293      tl_assert(wsu->ix2vec[ix_old] == wv_old);
294      delete_WV( wv_new );
295      return (WordSet)ix_old;
296   } else {
297      ensure_ix2vec_space( wsu );
298      tl_assert(wsu->ix2vec);
299      tl_assert(wsu->ix2vec_used < wsu->ix2vec_size);
300      wsu->ix2vec[wsu->ix2vec_used] = wv_new;
301      VG_(addToFM)( wsu->vec2ix, (Word)wv_new, (Word)wsu->ix2vec_used );
302      if (0) VG_(printf)("aodW %d\n", (Int)wsu->ix2vec_used );
303      wsu->ix2vec_used++;
304      tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
305      return (WordSet)(wsu->ix2vec_used - 1);
306   }
307}
308
309
310WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( HChar*, SizeT ),
311                             HChar* cc,
312                             void  (*dealloc)(void*),
313                             Word  cacheSize )
314{
315   WordSetU* wsu;
316   WordVec*  empty;
317
318   wsu          = alloc_nofail( cc, sizeof(WordSetU) );
319   VG_(memset)( wsu, 0, sizeof(WordSetU) );
320   wsu->alloc   = alloc_nofail;
321   wsu->cc      = cc;
322   wsu->dealloc = dealloc;
323   wsu->vec2ix  = VG_(newFM)( alloc_nofail, cc,
324                              dealloc, cmp_WordVecs_for_FM );
325   wsu->ix2vec_used = 0;
326   wsu->ix2vec_size = 0;
327   wsu->ix2vec      = NULL;
328   WCache_INIT(wsu->cache_addTo,     cacheSize);
329   WCache_INIT(wsu->cache_delFrom,   cacheSize);
330   WCache_INIT(wsu->cache_intersect, cacheSize);
331   WCache_INIT(wsu->cache_minus,     cacheSize);
332   empty = new_WV_of_size( wsu, 0 );
333   wsu->empty = add_or_dealloc_WordVec( wsu, empty );
334
335   return wsu;
336}
337
338void HG_(deleteWordSetU) ( WordSetU* wsu )
339{
340   void (*dealloc)(void*) = wsu->dealloc;
341   tl_assert(wsu->vec2ix);
342   VG_(deleteFM)( wsu->vec2ix, delete_WV_for_FM, NULL/*val-finalizer*/ );
343   if (wsu->ix2vec)
344      dealloc(wsu->ix2vec);
345   dealloc(wsu);
346}
347
348WordSet HG_(emptyWS) ( WordSetU* wsu )
349{
350   return wsu->empty;
351}
352
353Bool HG_(isEmptyWS) ( WordSetU* wsu, WordSet ws )
354{
355   WordVec* wv = do_ix2vec( wsu, ws );
356   wsu->n_isEmpty++;
357   if (wv->size == 0) {
358      tl_assert(ws == wsu->empty);
359      return True;
360   } else {
361      tl_assert(ws != wsu->empty);
362      return False;
363   }
364}
365
366Bool HG_(isSingletonWS) ( WordSetU* wsu, WordSet ws, UWord w )
367{
368   WordVec* wv;
369   tl_assert(wsu);
370   wsu->n_isSingleton++;
371   wv = do_ix2vec( wsu, ws );
372   return (Bool)(wv->size == 1 && wv->words[0] == w);
373}
374
375UWord HG_(cardinalityWS) ( WordSetU* wsu, WordSet ws )
376{
377   WordVec* wv;
378   tl_assert(wsu);
379   wv = do_ix2vec( wsu, ws );
380   tl_assert(wv->size >= 0);
381   return wv->size;
382}
383
384UWord HG_(anyElementOfWS) ( WordSetU* wsu, WordSet ws )
385{
386   WordVec* wv;
387   tl_assert(wsu);
388   wsu->n_anyElementOf++;
389   wv = do_ix2vec( wsu, ws );
390   tl_assert(wv->size >= 1);
391   return wv->words[0];
392}
393
394UWord HG_(cardinalityWSU) ( WordSetU* wsu )
395{
396   tl_assert(wsu);
397   return wsu->ix2vec_used;
398}
399
400void HG_(getPayloadWS) ( /*OUT*/UWord** words, /*OUT*/UWord* nWords,
401                         WordSetU* wsu, WordSet ws )
402{
403   WordVec* wv;
404   tl_assert(wsu);
405   wv = do_ix2vec( wsu, ws );
406   tl_assert(wv->size >= 0);
407   *nWords = wv->size;
408   *words  = wv->words;
409}
410
411Bool HG_(plausibleWS) ( WordSetU* wsu, WordSet ws )
412{
413   if (wsu == NULL) return False;
414   if (ws < 0 || ws >= wsu->ix2vec_used)
415      return False;
416   return True;
417}
418
419Bool HG_(saneWS_SLOW) ( WordSetU* wsu, WordSet ws )
420{
421   WordVec* wv;
422   UWord    i;
423   if (wsu == NULL) return False;
424   if (ws < 0 || ws >= wsu->ix2vec_used)
425      return False;
426   wv = do_ix2vec( wsu, ws );
427   /* can never happen .. do_ix2vec will assert instead.  Oh well. */
428   if (wv->owner != wsu) return False;
429   if (wv->size < 0) return False;
430   if (wv->size > 0) {
431      for (i = 0; i < wv->size-1; i++) {
432         if (wv->words[i] >= wv->words[i+1])
433            return False;
434      }
435   }
436   return True;
437}
438
439Bool HG_(elemWS) ( WordSetU* wsu, WordSet ws, UWord w )
440{
441   UWord    i;
442   WordVec* wv = do_ix2vec( wsu, ws );
443   wsu->n_elem++;
444   for (i = 0; i < wv->size; i++) {
445      if (wv->words[i] == w)
446         return True;
447   }
448   return False;
449}
450
451WordSet HG_(doubletonWS) ( WordSetU* wsu, UWord w1, UWord w2 )
452{
453   WordVec* wv;
454   wsu->n_doubleton++;
455   if (w1 == w2) {
456      wv = new_WV_of_size(wsu, 1);
457      wv->words[0] = w1;
458   }
459   else if (w1 < w2) {
460      wv = new_WV_of_size(wsu, 2);
461      wv->words[0] = w1;
462      wv->words[1] = w2;
463   }
464   else {
465      tl_assert(w1 > w2);
466      wv = new_WV_of_size(wsu, 2);
467      wv->words[0] = w2;
468      wv->words[1] = w1;
469   }
470   return add_or_dealloc_WordVec( wsu, wv );
471}
472
473WordSet HG_(singletonWS) ( WordSetU* wsu, UWord w )
474{
475   return HG_(doubletonWS)( wsu, w, w );
476}
477
478WordSet HG_(isSubsetOf) ( WordSetU* wsu, WordSet small, WordSet big )
479{
480   wsu->n_isSubsetOf++;
481   return small == HG_(intersectWS)( wsu, small, big );
482}
483
484void HG_(ppWS) ( WordSetU* wsu, WordSet ws )
485{
486   UWord    i;
487   WordVec* wv;
488   tl_assert(wsu);
489   wv = do_ix2vec( wsu, ws );
490   VG_(printf)("{");
491   for (i = 0; i < wv->size; i++) {
492      VG_(printf)("%p", (void*)wv->words[i]);
493      if (i < wv->size-1)
494         VG_(printf)(",");
495   }
496   VG_(printf)("}");
497}
498
499void HG_(ppWSUstats) ( WordSetU* wsu, HChar* name )
500{
501   VG_(printf)("   WordSet \"%s\":\n", name);
502   VG_(printf)("      addTo        %10lu (%lu uncached)\n",
503               wsu->n_add, wsu->n_add_uncached);
504   VG_(printf)("      delFrom      %10lu (%lu uncached)\n",
505               wsu->n_del, wsu->n_del_uncached);
506   VG_(printf)("      union        %10lu\n", wsu->n_union);
507   VG_(printf)("      intersect    %10lu (%lu uncached) "
508               "[nb. incl isSubsetOf]\n",
509               wsu->n_intersect, wsu->n_intersect_uncached);
510   VG_(printf)("      minus        %10lu (%lu uncached)\n",
511               wsu->n_minus, wsu->n_minus_uncached);
512   VG_(printf)("      elem         %10lu\n",   wsu->n_elem);
513   VG_(printf)("      doubleton    %10lu\n",   wsu->n_doubleton);
514   VG_(printf)("      isEmpty      %10lu\n",   wsu->n_isEmpty);
515   VG_(printf)("      isSingleton  %10lu\n",   wsu->n_isSingleton);
516   VG_(printf)("      anyElementOf %10lu\n",   wsu->n_anyElementOf);
517   VG_(printf)("      isSubsetOf   %10lu\n",   wsu->n_isSubsetOf);
518}
519
520WordSet HG_(addToWS) ( WordSetU* wsu, WordSet ws, UWord w )
521{
522   UWord    k, j;
523   WordVec* wv_new;
524   WordVec* wv;
525   WordSet  result = (WordSet)(-1); /* bogus */
526
527   wsu->n_add++;
528   WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_addTo, ws, w);
529   wsu->n_add_uncached++;
530
531   /* If already present, this is a no-op. */
532   wv = do_ix2vec( wsu, ws );
533   for (k = 0; k < wv->size; k++) {
534      if (wv->words[k] == w) {
535         result = ws;
536         goto out;
537      }
538   }
539   /* Ok, not present.  Build a new one ... */
540   wv_new = new_WV_of_size( wsu, wv->size + 1 );
541   k = j = 0;
542   for (; k < wv->size && wv->words[k] < w; k++) {
543      wv_new->words[j++] = wv->words[k];
544   }
545   wv_new->words[j++] = w;
546   for (; k < wv->size; k++) {
547      tl_assert(wv->words[k] > w);
548      wv_new->words[j++] = wv->words[k];
549   }
550   tl_assert(j == wv_new->size);
551
552   /* Find any existing copy, or add the new one. */
553   result = add_or_dealloc_WordVec( wsu, wv_new );
554   tl_assert(result != (WordSet)(-1));
555
556  out:
557   WCache_UPDATE(wsu->cache_addTo, ws, w, result);
558   return result;
559}
560
561WordSet HG_(delFromWS) ( WordSetU* wsu, WordSet ws, UWord w )
562{
563   UWord    i, j, k;
564   WordVec* wv_new;
565   WordSet  result = (WordSet)(-1); /* bogus */
566   WordVec* wv = do_ix2vec( wsu, ws );
567
568   wsu->n_del++;
569
570   /* special case empty set */
571   if (wv->size == 0) {
572      tl_assert(ws == wsu->empty);
573      return ws;
574   }
575
576   WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_delFrom, ws, w);
577   wsu->n_del_uncached++;
578
579   /* If not already present, this is a no-op. */
580   for (i = 0; i < wv->size; i++) {
581      if (wv->words[i] == w)
582         break;
583   }
584   if (i == wv->size) {
585      result = ws;
586      goto out;
587   }
588   /* So w is present in ws, and the new set will be one element
589      smaller. */
590   tl_assert(i >= 0 && i < wv->size);
591   tl_assert(wv->size > 0);
592
593   wv_new = new_WV_of_size( wsu, wv->size - 1 );
594   j = k = 0;
595   for (; j < wv->size; j++) {
596      if (j == i)
597         continue;
598      wv_new->words[k++] = wv->words[j];
599   }
600   tl_assert(k == wv_new->size);
601
602   result = add_or_dealloc_WordVec( wsu, wv_new );
603   if (wv->size == 1) {
604      tl_assert(result == wsu->empty);
605   }
606
607  out:
608   WCache_UPDATE(wsu->cache_delFrom, ws, w, result);
609   return result;
610}
611
612WordSet HG_(unionWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
613{
614   UWord    i1, i2, k, sz;
615   WordVec* wv_new;
616   WordVec* wv1 = do_ix2vec( wsu, ws1 );
617   WordVec* wv2 = do_ix2vec( wsu, ws2 );
618   wsu->n_union++;
619   sz = 0;
620   i1 = i2 = 0;
621   while (1) {
622      if (i1 >= wv1->size || i2 >= wv2->size)
623         break;
624      sz++;
625      if (wv1->words[i1] < wv2->words[i2]) {
626         i1++;
627      } else
628      if (wv1->words[i1] > wv2->words[i2]) {
629         i2++;
630      } else {
631         i1++;
632         i2++;
633      }
634   }
635   tl_assert(i1 <= wv1->size);
636   tl_assert(i2 <= wv2->size);
637   tl_assert(i1 == wv1->size || i2 == wv2->size);
638   if (i1 == wv1->size && i2 < wv2->size) {
639      sz += (wv2->size - i2);
640   }
641   if (i2 == wv2->size && i1 < wv1->size) {
642      sz += (wv1->size - i1);
643   }
644
645   wv_new = new_WV_of_size( wsu, sz );
646   k = 0;
647
648   i1 = i2 = 0;
649   while (1) {
650      if (i1 >= wv1->size || i2 >= wv2->size)
651         break;
652      if (wv1->words[i1] < wv2->words[i2]) {
653         wv_new->words[k++] = wv1->words[i1];
654         i1++;
655      } else
656      if (wv1->words[i1] > wv2->words[i2]) {
657         wv_new->words[k++] = wv2->words[i2];
658         i2++;
659      } else {
660         wv_new->words[k++] = wv1->words[i1];
661         i1++;
662         i2++;
663      }
664   }
665   tl_assert(i1 <= wv1->size);
666   tl_assert(i2 <= wv2->size);
667   tl_assert(i1 == wv1->size || i2 == wv2->size);
668   if (i1 == wv1->size && i2 < wv2->size) {
669      while (i2 < wv2->size)
670         wv_new->words[k++] = wv2->words[i2++];
671   }
672   if (i2 == wv2->size && i1 < wv1->size) {
673      while (i1 < wv1->size)
674         wv_new->words[k++] = wv1->words[i1++];
675   }
676
677   tl_assert(k == sz);
678
679   return add_or_dealloc_WordVec( wsu, wv_new );
680}
681
682WordSet HG_(intersectWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
683{
684   UWord    i1, i2, k, sz;
685   WordSet  ws_new = (WordSet)(-1); /* bogus */
686   WordVec* wv_new;
687   WordVec* wv1;
688   WordVec* wv2;
689
690   wsu->n_intersect++;
691
692   /* Deal with an obvious case fast. */
693   if (ws1 == ws2)
694      return ws1;
695
696   /* Since intersect(x,y) == intersect(y,x), convert both variants to
697      the same query.  This reduces the number of variants the cache
698      has to deal with. */
699   if (ws1 > ws2) {
700      WordSet wst = ws1; ws1 = ws2; ws2 = wst;
701   }
702
703   WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_intersect, ws1, ws2);
704   wsu->n_intersect_uncached++;
705
706   wv1 = do_ix2vec( wsu, ws1 );
707   wv2 = do_ix2vec( wsu, ws2 );
708   sz = 0;
709   i1 = i2 = 0;
710   while (1) {
711      if (i1 >= wv1->size || i2 >= wv2->size)
712         break;
713      if (wv1->words[i1] < wv2->words[i2]) {
714         i1++;
715      } else
716      if (wv1->words[i1] > wv2->words[i2]) {
717         i2++;
718      } else {
719         sz++;
720         i1++;
721         i2++;
722      }
723   }
724   tl_assert(i1 <= wv1->size);
725   tl_assert(i2 <= wv2->size);
726   tl_assert(i1 == wv1->size || i2 == wv2->size);
727
728   wv_new = new_WV_of_size( wsu, sz );
729   k = 0;
730
731   i1 = i2 = 0;
732   while (1) {
733      if (i1 >= wv1->size || i2 >= wv2->size)
734         break;
735      if (wv1->words[i1] < wv2->words[i2]) {
736         i1++;
737      } else
738      if (wv1->words[i1] > wv2->words[i2]) {
739         i2++;
740      } else {
741         wv_new->words[k++] = wv1->words[i1];
742         i1++;
743         i2++;
744      }
745   }
746   tl_assert(i1 <= wv1->size);
747   tl_assert(i2 <= wv2->size);
748   tl_assert(i1 == wv1->size || i2 == wv2->size);
749
750   tl_assert(k == sz);
751
752   ws_new = add_or_dealloc_WordVec( wsu, wv_new );
753   if (sz == 0) {
754      tl_assert(ws_new == wsu->empty);
755   }
756
757   tl_assert(ws_new != (WordSet)(-1));
758   WCache_UPDATE(wsu->cache_intersect, ws1, ws2, ws_new);
759
760   return ws_new;
761}
762
763WordSet HG_(minusWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
764{
765   UWord    i1, i2, k, sz;
766   WordSet  ws_new = (WordSet)(-1); /* bogus */
767   WordVec* wv_new;
768   WordVec* wv1;
769   WordVec* wv2;
770
771   wsu->n_minus++;
772   WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_minus, ws1, ws2);
773   wsu->n_minus_uncached++;
774
775   wv1 = do_ix2vec( wsu, ws1 );
776   wv2 = do_ix2vec( wsu, ws2 );
777   sz = 0;
778   i1 = i2 = 0;
779   while (1) {
780      if (i1 >= wv1->size || i2 >= wv2->size)
781         break;
782      if (wv1->words[i1] < wv2->words[i2]) {
783         sz++;
784         i1++;
785      } else
786      if (wv1->words[i1] > wv2->words[i2]) {
787         i2++;
788      } else {
789         i1++;
790         i2++;
791      }
792   }
793   tl_assert(i1 <= wv1->size);
794   tl_assert(i2 <= wv2->size);
795   tl_assert(i1 == wv1->size || i2 == wv2->size);
796   if (i2 == wv2->size && i1 < wv1->size) {
797      sz += (wv1->size - i1);
798   }
799
800   wv_new = new_WV_of_size( wsu, sz );
801   k = 0;
802
803   i1 = i2 = 0;
804   while (1) {
805      if (i1 >= wv1->size || i2 >= wv2->size)
806         break;
807      if (wv1->words[i1] < wv2->words[i2]) {
808         wv_new->words[k++] = wv1->words[i1];
809         i1++;
810      } else
811      if (wv1->words[i1] > wv2->words[i2]) {
812         i2++;
813      } else {
814         i1++;
815         i2++;
816      }
817   }
818   tl_assert(i1 <= wv1->size);
819   tl_assert(i2 <= wv2->size);
820   tl_assert(i1 == wv1->size || i2 == wv2->size);
821   if (i2 == wv2->size && i1 < wv1->size) {
822      while (i1 < wv1->size)
823         wv_new->words[k++] = wv1->words[i1++];
824   }
825
826   tl_assert(k == sz);
827
828   ws_new = add_or_dealloc_WordVec( wsu, wv_new );
829   if (sz == 0) {
830      tl_assert(ws_new == wsu->empty);
831   }
832
833   tl_assert(ws_new != (WordSet)(-1));
834   WCache_UPDATE(wsu->cache_minus, ws1, ws2, ws_new);
835
836   return ws_new;
837}
838
839static __attribute__((unused))
840void show_WS ( WordSetU* wsu, WordSet ws )
841{
842   UWord i;
843   WordVec* wv = do_ix2vec( wsu, ws );
844   VG_(printf)("#%u{", ws);
845   for (i = 0; i < wv->size; i++) {
846      VG_(printf)("%lu", wv->words[i]);
847      if (i < wv->size-1)
848         VG_(printf)(",");
849   }
850   VG_(printf)("}\n");
851}
852
853//------------------------------------------------------------------//
854//---                        end WordSet                         ---//
855//---                       Implementation                       ---//
856//------------------------------------------------------------------//
857
858/*--------------------------------------------------------------------*/
859/*--- end                                             hg_wordset.c ---*/
860/*--------------------------------------------------------------------*/
861