1/* -*- mode: C; c-basic-offset: 3; indent-tabs-mode: nil; -*- */
2/*
3  This file is part of drd, a thread error detector.
4
5  Copyright (C) 2006-2011 Bart Van Assche <bvanassche@acm.org>.
6
7  This program is free software; you can redistribute it and/or
8  modify it under the terms of the GNU General Public License as
9  published by the Free Software Foundation; either version 2 of the
10  License, or (at your option) any later version.
11
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  General Public License for more details.
16
17  You should have received a copy of the GNU General Public License
18  along with this program; if not, write to the Free Software
19  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  02111-1307, USA.
21
22  The GNU General Public License is contained in the file COPYING.
23*/
24
25
26#include "drd_barrier.h"
27#include "drd_clientobj.h"
28#include "drd_error.h"
29#include "drd_suppression.h"
30#include "pub_tool_errormgr.h"    // VG_(maybe_record_error)()
31#include "pub_tool_libcassert.h"  // tl_assert()
32#include "pub_tool_libcprint.h"   // VG_(printf)()
33#include "pub_tool_machine.h"     // VG_(get_IP)()
34#include "pub_tool_mallocfree.h"  // VG_(malloc)(), VG_(free)()
35#include "pub_tool_oset.h"
36#include "pub_tool_threadstate.h" // VG_(get_running_tid)()
37
38
39/* Type definitions. */
40
41/** Information associated with one thread participating in a barrier. */
42struct barrier_thread_info
43{
44   UWord       tid;           // A DrdThreadId declared as UWord because
45                              // this member variable is the key of an OSet.
46   Segment*    sg;            // Segment of the last pthread_barrier() call
47                              // by thread tid.
48   Segment*    post_wait_sg;  // Segment created after *_barrier_wait() finished
49   ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
50   Bool       thread_finished;// Whether thread 'tid' has finished.
51};
52
53
54/* Local functions. */
55
56static void barrier_cleanup(struct barrier_info* p);
57static void barrier_delete_thread(struct barrier_info* const p,
58                                  const DrdThreadId tid);
59static const char* barrier_get_typename(struct barrier_info* const p);
60static const char* barrier_type_name(const BarrierT bt);
61static
62void barrier_report_wait_delete_race(const struct barrier_info* const p,
63                                     const struct barrier_thread_info* const q);
64
65
66/* Local variables. */
67
68static Bool  s_trace_barrier = False;
69static ULong s_barrier_segment_creation_count;
70
71
72/* Function definitions. */
73
74void DRD_(barrier_set_trace)(const Bool trace_barrier)
75{
76   s_trace_barrier = trace_barrier;
77}
78
79/**
80 * Initialize the structure *p with the specified thread ID and iteration
81 * information.
82 */
83static
84void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
85                                     const DrdThreadId tid)
86{
87   p->tid             = tid;
88   p->sg              = NULL;
89   p->post_wait_sg    = 0;
90   p->wait_call_ctxt  = 0;
91   p->thread_finished = False;
92}
93
94/**
95 * Deallocate the memory that is owned by members of
96 * struct barrier_thread_info.
97 */
98static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
99{
100   tl_assert(p);
101   DRD_(sg_put)(p->sg);
102   DRD_(sg_put)(p->post_wait_sg);
103}
104
105/**
106 * Initialize the structure *p with the specified client-side barrier address,
107 * barrier object size and number of participants in each barrier.
108 */
109static
110void DRD_(barrier_initialize)(struct barrier_info* const p,
111                              const Addr barrier,
112                              const BarrierT barrier_type,
113                              const Word count)
114{
115   int i;
116
117   tl_assert(barrier != 0);
118   tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
119   tl_assert(p->a1 == barrier);
120
121   p->cleanup           = (void(*)(DrdClientobj*))barrier_cleanup;
122   p->delete_thread
123      = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
124   p->barrier_type      = barrier_type;
125   p->count             = count;
126   p->pre_iteration     = 0;
127   p->post_iteration    = 0;
128   p->pre_waiters_left  = count;
129   p->post_waiters_left = count;
130
131   tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
132   tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
133             >= sizeof(DrdThreadId));
134   for (i = 0; i < 2; i++) {
135      p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
136                                       VG_(free));
137   }
138}
139
140/**
141 * Deallocate the memory owned by the struct barrier_info object and also
142 * all the nodes in the OSet p->oset.
143 *
144 * Called by clientobj_destroy().
145 */
146static void barrier_cleanup(struct barrier_info* p)
147{
148   struct barrier_thread_info* q;
149   Segment* latest_sg = 0;
150   OSet* oset;
151   int i;
152
153   tl_assert(p);
154
155   DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
156   tl_assert(latest_sg);
157
158   if (p->pre_waiters_left != p->count) {
159      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
160      VG_(maybe_record_error)(VG_(get_running_tid)(),
161                              BarrierErr,
162                              VG_(get_IP)(VG_(get_running_tid)()),
163                              "Destruction of barrier that is being waited"
164                              " upon",
165                              &bei);
166   } else {
167      oset = p->oset[1 - (p->pre_iteration & 1)];
168      VG_(OSetGen_ResetIter)(oset);
169      for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
170         if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
171                                              &latest_sg->vc))
172         {
173            barrier_report_wait_delete_race(p, q);
174         }
175         DRD_(barrier_thread_destroy)(q);
176      }
177   }
178
179   for (i = 0; i < 2; i++) {
180      VG_(OSetGen_Destroy)(p->oset[i]);
181      p->oset[i] = NULL;
182   }
183
184   DRD_(sg_put)(latest_sg);
185}
186
187/**
188 * Look up the client-side barrier address barrier in s_barrier[]. If not
189 * found, add it.
190 */
191static
192struct barrier_info*
193DRD_(barrier_get_or_allocate)(const Addr barrier,
194                              const BarrierT barrier_type, const Word count)
195{
196   struct barrier_info *p;
197
198   tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
199
200   tl_assert(offsetof(DrdClientobj, barrier) == 0);
201   p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
202   if (p == 0)
203   {
204      p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
205      DRD_(barrier_initialize)(p, barrier, barrier_type, count);
206   }
207   return p;
208}
209
210/**
211 * Look up the address of the struct barrier_info associated with the
212 * client-side barrier object.
213 */
214static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
215{
216   tl_assert(offsetof(DrdClientobj, barrier) == 0);
217   return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
218}
219
220/**
221 * Initialize a barrier with given client address, barrier type and number of
222 * participants. The 'reinitialization' argument indicates whether a barrier
223 * object is being initialized or reinitialized.
224 *
225 * Called before pthread_barrier_init().
226 */
227void DRD_(barrier_init)(const Addr barrier,
228                        const BarrierT barrier_type, const Word count,
229                        const Bool reinitialization)
230{
231   struct barrier_info* p;
232
233   tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
234
235   if (count == 0)
236   {
237      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
238      VG_(maybe_record_error)(VG_(get_running_tid)(),
239                              BarrierErr,
240                              VG_(get_IP)(VG_(get_running_tid)()),
241                              "pthread_barrier_init: 'count' argument is zero",
242                              &bei);
243   }
244
245   if (! reinitialization && barrier_type == pthread_barrier)
246   {
247      p = DRD_(barrier_get)(barrier);
248      if (p)
249      {
250         BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
251         VG_(maybe_record_error)(VG_(get_running_tid)(),
252                                 BarrierErr,
253                                 VG_(get_IP)(VG_(get_running_tid)()),
254                                 "Barrier reinitialization",
255                                 &bei);
256      }
257   }
258
259   p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
260
261   if (s_trace_barrier) {
262      if (reinitialization)
263         DRD_(trace_msg)("[%d] barrier_reinit    %s 0x%lx count %ld -> %ld",
264                         DRD_(thread_get_running_tid)(),
265                         barrier_get_typename(p), barrier, p->count, count);
266      else
267         DRD_(trace_msg)("[%d] barrier_init      %s 0x%lx",
268                         DRD_(thread_get_running_tid)(),
269                         barrier_get_typename(p),
270                         barrier);
271   }
272
273   if (reinitialization && p->count != count)
274   {
275      if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
276      {
277         BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
278         VG_(maybe_record_error)(VG_(get_running_tid)(),
279                                 BarrierErr,
280                                 VG_(get_IP)(VG_(get_running_tid)()),
281                                 "Reinitialization of barrier with active"
282                                 " waiters",
283                                 &bei);
284      }
285      p->count = count;
286   }
287}
288
289/** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
290void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
291{
292   struct barrier_info* p;
293
294   p = DRD_(barrier_get)(barrier);
295
296   if (s_trace_barrier)
297      DRD_(trace_msg)("[%d] barrier_destroy   %s 0x%lx",
298                      DRD_(thread_get_running_tid)(),
299                      barrier_get_typename(p), barrier);
300
301   if (p == 0)
302   {
303      GenericErrInfo GEI = {
304	 .tid = DRD_(thread_get_running_tid)(),
305	 .addr = barrier,
306      };
307      VG_(maybe_record_error)(VG_(get_running_tid)(),
308                              GenericErr,
309                              VG_(get_IP)(VG_(get_running_tid)()),
310                              "Not a barrier",
311                              &GEI);
312      return;
313   }
314
315   if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
316   {
317      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
318      VG_(maybe_record_error)(VG_(get_running_tid)(),
319                              BarrierErr,
320                              VG_(get_IP)(VG_(get_running_tid)()),
321                              "Destruction of a barrier with active waiters",
322                              &bei);
323   }
324
325   DRD_(clientobj_remove)(p->a1, ClientBarrier);
326}
327
328/** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
329void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
330                            const BarrierT barrier_type)
331{
332   struct barrier_info* p;
333   struct barrier_thread_info* q;
334   const UWord word_tid = tid;
335   OSet* oset;
336
337   p = DRD_(barrier_get)(barrier);
338   if (p == 0 && barrier_type == gomp_barrier) {
339      /*
340       * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
341       * not. The only cause I know of that can trigger this is that libgomp.so
342       * has been compiled with --enable-linux-futex.
343       */
344      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
345      VG_(maybe_record_error)(VG_(get_running_tid)(),
346                              BarrierErr,
347                              VG_(get_IP)(VG_(get_running_tid)()),
348                              "Please verify whether gcc has been configured"
349                              " with option --disable-linux-futex. See also"
350                              " the section about OpenMP in the DRD manual.",
351                              &bei);
352   }
353   tl_assert(p);
354
355   if (s_trace_barrier)
356      DRD_(trace_msg)("[%d] barrier_pre_wait  %s 0x%lx iteration %ld",
357                      DRD_(thread_get_running_tid)(),
358                      barrier_get_typename(p), barrier, p->pre_iteration);
359
360   /* Clean up nodes associated with finished threads. */
361   oset = p->oset[p->pre_iteration & 1];
362   tl_assert(oset);
363   VG_(OSetGen_ResetIter)(oset);
364   for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
365      if (q->thread_finished) {
366         void* r = VG_(OSetGen_Remove)(oset, &q->tid);
367         tl_assert(r == q);
368         DRD_(barrier_thread_destroy)(q);
369         VG_(OSetGen_FreeNode)(oset, q);
370         VG_(OSetGen_ResetIterAt)(oset, &word_tid);
371      }
372   }
373   /* Allocate the per-thread data structure if necessary. */
374   q = VG_(OSetGen_Lookup)(oset, &word_tid);
375   if (q == NULL) {
376      q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
377      DRD_(barrier_thread_initialize)(q, tid);
378      VG_(OSetGen_Insert)(oset, q);
379      tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
380   }
381
382   /* Record *_barrier_wait() call context. */
383   q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
384
385   /*
386    * Store a pointer to the latest segment of the current thread in the
387    * per-thread data structure.
388    */
389   DRD_(thread_get_latest_segment)(&q->sg, tid);
390
391   /*
392    * If the same number of threads as the barrier count indicates have
393    * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
394    * reset the p->pre_waiters_left counter.
395    */
396   if (--p->pre_waiters_left <= 0)
397   {
398      p->pre_iteration++;
399      p->pre_waiters_left = p->count;
400   }
401}
402
403/** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
404void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
405                             const BarrierT barrier_type, const Bool waited,
406                             const Bool serializing)
407{
408   struct barrier_info* p;
409   const UWord word_tid = tid;
410   struct barrier_thread_info* q;
411   struct barrier_thread_info* r;
412   OSet* oset;
413
414   p = DRD_(barrier_get)(barrier);
415
416   if (s_trace_barrier)
417      DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s",
418                      tid, p ? barrier_get_typename(p) : "(?)",
419                      barrier, p ? p->post_iteration : -1,
420                      serializing ? " (serializing)" : "");
421
422   /*
423    * If p == 0, this means that the barrier has been destroyed after
424    * *_barrier_wait() returned and before this function was called. Just
425    * return in that case -- race conditions between *_barrier_wait()
426    * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
427    */
428   if (p == 0)
429      return;
430
431   /* If the *_barrier_wait() call returned an error code, exit. */
432   if (! waited)
433      return;
434
435   oset = p->oset[p->post_iteration & 1];
436   q = VG_(OSetGen_Lookup)(oset, &word_tid);
437   if (p->pre_iteration - p->post_iteration > 1) {
438      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
439      VG_(maybe_record_error)(VG_(get_running_tid)(),
440                              BarrierErr,
441                              VG_(get_IP)(VG_(get_running_tid)()),
442                              "Number of concurrent pthread_barrier_wait()"
443                              " calls exceeds the barrier count",
444                              &bei);
445   } else if (q == NULL) {
446      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
447      VG_(maybe_record_error)(VG_(get_running_tid)(),
448                              BarrierErr,
449                              VG_(get_IP)(VG_(get_running_tid)()),
450                              "Error in barrier implementation"
451                              " -- barrier_wait() started before"
452                              " barrier_destroy() and finished after"
453                              " barrier_destroy()",
454                              &bei);
455   }
456   if (q == NULL) {
457      q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
458      DRD_(barrier_thread_initialize)(q, tid);
459      VG_(OSetGen_Insert)(oset, q);
460      tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
461      DRD_(thread_get_latest_segment)(&q->sg, tid);
462   }
463
464   /* Create a new segment and store a pointer to that segment. */
465   DRD_(thread_new_segment)(tid);
466   DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
467   s_barrier_segment_creation_count++;
468
469   /*
470    * Combine all vector clocks that were stored in the pre_barrier_wait
471    * wrapper with the vector clock of the current thread.
472    */
473   {
474      VectorClock old_vc;
475
476      DRD_(vc_copy)(&old_vc, &DRD_(g_threadinfo)[tid].last->vc);
477      VG_(OSetGen_ResetIter)(oset);
478      for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
479      {
480         if (r != q)
481         {
482            tl_assert(r->sg);
483            DRD_(vc_combine)(&DRD_(g_threadinfo)[tid].last->vc,
484                             &r->sg->vc);
485         }
486      }
487      DRD_(thread_update_conflict_set)(tid, &old_vc);
488      DRD_(vc_cleanup)(&old_vc);
489   }
490
491   /*
492    * If the same number of threads as the barrier count indicates have
493    * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
494    * reset the p->post_waiters_left counter.
495    */
496   if (--p->post_waiters_left <= 0)
497   {
498      p->post_iteration++;
499      p->post_waiters_left = p->count;
500   }
501}
502
503/** Called when thread tid stops to exist. */
504static void barrier_delete_thread(struct barrier_info* const p,
505                                  const DrdThreadId tid)
506{
507   struct barrier_thread_info* q;
508   const UWord word_tid = tid;
509   int i;
510
511   for (i = 0; i < 2; i++) {
512      q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
513      if (q)
514         q->thread_finished = True;
515   }
516}
517
518/**
519 * Report that *_barrier_destroy() has been called but that this call was
520 * not synchronized with the last *_barrier_wait() call on the same barrier.
521 *
522 * This topic has been discussed extensively on comp.programming.threads
523 * (February 3, 2009). See also
524 * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
525 */
526static
527void barrier_report_wait_delete_race(const struct barrier_info* const p,
528                                     const struct barrier_thread_info* const q)
529{
530   tl_assert(p);
531   tl_assert(q);
532
533   {
534      BarrierErrInfo bei
535         = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
536      VG_(maybe_record_error)(VG_(get_running_tid)(),
537                              BarrierErr,
538                              VG_(get_IP)(VG_(get_running_tid)()),
539                              "Destruction of barrier not synchronized with"
540                              " barrier wait call",
541                              &bei);
542   }
543}
544
545static const char* barrier_get_typename(struct barrier_info* const p)
546{
547   tl_assert(p);
548
549   return barrier_type_name(p->barrier_type);
550}
551
552static const char* barrier_type_name(const BarrierT bt)
553{
554   switch (bt)
555   {
556   case pthread_barrier:
557      return "pthread barrier";
558   case gomp_barrier:
559      return "gomp barrier";
560   }
561   return "?";
562}
563
564ULong DRD_(get_barrier_segment_creation_count)(void)
565{
566   return s_barrier_segment_creation_count;
567}
568