1/*
2  This file is part of drd, a thread error detector.
3
4  Copyright (C) 2006-2013 Bart Van Assche <bvanassche@acm.org>.
5
6  This program is free software; you can redistribute it and/or
7  modify it under the terms of the GNU General Public License as
8  published by the Free Software Foundation; either version 2 of the
9  License, or (at your option) any later version.
10
11  This program is distributed in the hope that it will be useful, but
12  WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  General Public License for more details.
15
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19  02111-1307, USA.
20
21  The GNU General Public License is contained in the file COPYING.
22*/
23
24
25#include "drd_barrier.h"
26#include "drd_clientobj.h"
27#include "drd_error.h"
28#include "drd_suppression.h"
29#include "pub_tool_errormgr.h"    // VG_(maybe_record_error)()
30#include "pub_tool_libcassert.h"  // tl_assert()
31#include "pub_tool_libcprint.h"   // VG_(printf)()
32#include "pub_tool_machine.h"     // VG_(get_IP)()
33#include "pub_tool_mallocfree.h"  // VG_(malloc)(), VG_(free)()
34#include "pub_tool_oset.h"
35#include "pub_tool_threadstate.h" // VG_(get_running_tid)()
36
37
38/* Type definitions. */
39
40/** Information associated with one thread participating in a barrier. */
41struct barrier_thread_info
42{
43   UWord       tid;           // A DrdThreadId declared as UWord because
44                              // this member variable is the key of an OSet.
45   Segment*    sg;            // Segment of the last pthread_barrier() call
46                              // by thread tid.
47   Segment*    post_wait_sg;  // Segment created after *_barrier_wait() finished
48   ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
49   Bool       thread_finished;// Whether thread 'tid' has finished.
50};
51
52
53/* Local functions. */
54
55static void barrier_cleanup(struct barrier_info* p);
56static void barrier_delete_thread(struct barrier_info* const p,
57                                  const DrdThreadId tid);
58static const HChar* barrier_get_typename(struct barrier_info* const p);
59static const HChar* barrier_type_name(const BarrierT bt);
60static
61void barrier_report_wait_delete_race(const struct barrier_info* const p,
62                                     const struct barrier_thread_info* const q);
63
64
65/* Local variables. */
66
67static Bool  s_trace_barrier = False;
68static ULong s_barrier_segment_creation_count;
69
70
71/* Function definitions. */
72
73void DRD_(barrier_set_trace)(const Bool trace_barrier)
74{
75   s_trace_barrier = trace_barrier;
76}
77
78/**
79 * Initialize the structure *p with the specified thread ID and iteration
80 * information.
81 */
82static
83void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
84                                     const DrdThreadId tid)
85{
86   p->tid             = tid;
87   p->sg              = NULL;
88   p->post_wait_sg    = 0;
89   p->wait_call_ctxt  = 0;
90   p->thread_finished = False;
91}
92
93/**
94 * Deallocate the memory that is owned by members of
95 * struct barrier_thread_info.
96 */
97static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
98{
99   tl_assert(p);
100   DRD_(sg_put)(p->sg);
101   DRD_(sg_put)(p->post_wait_sg);
102}
103
104/**
105 * Initialize the structure *p with the specified client-side barrier address,
106 * barrier object size and number of participants in each barrier.
107 */
108static
109void DRD_(barrier_initialize)(struct barrier_info* const p,
110                              const Addr barrier,
111                              const BarrierT barrier_type,
112                              const Word count)
113{
114   int i;
115
116   tl_assert(barrier != 0);
117   tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
118   tl_assert(p->a1 == barrier);
119
120   p->cleanup           = (void(*)(DrdClientobj*))barrier_cleanup;
121   p->delete_thread
122      = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
123   p->barrier_type      = barrier_type;
124   p->count             = count;
125   p->pre_iteration     = 0;
126   p->post_iteration    = 0;
127   p->pre_waiters_left  = count;
128   p->post_waiters_left = count;
129
130   tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
131   tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
132             >= sizeof(DrdThreadId));
133   for (i = 0; i < 2; i++) {
134      p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
135                                       VG_(free));
136   }
137}
138
139/**
140 * Deallocate the memory owned by the struct barrier_info object and also
141 * all the nodes in the OSet p->oset.
142 *
143 * Called by clientobj_destroy().
144 */
145static void barrier_cleanup(struct barrier_info* p)
146{
147   struct barrier_thread_info* q;
148   Segment* latest_sg = 0;
149   OSet* oset;
150   int i;
151
152   tl_assert(p);
153
154   DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
155   tl_assert(latest_sg);
156
157   if (p->pre_waiters_left != p->count) {
158      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
159      VG_(maybe_record_error)(VG_(get_running_tid)(),
160                              BarrierErr,
161                              VG_(get_IP)(VG_(get_running_tid)()),
162                              "Destruction of barrier that is being waited"
163                              " upon",
164                              &bei);
165   } else {
166      oset = p->oset[1 - (p->pre_iteration & 1)];
167      VG_(OSetGen_ResetIter)(oset);
168      for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
169         if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
170                                              &latest_sg->vc))
171         {
172            barrier_report_wait_delete_race(p, q);
173         }
174         DRD_(barrier_thread_destroy)(q);
175      }
176   }
177
178   for (i = 0; i < 2; i++) {
179      VG_(OSetGen_Destroy)(p->oset[i]);
180      p->oset[i] = NULL;
181   }
182
183   DRD_(sg_put)(latest_sg);
184}
185
186/**
187 * Look up the client-side barrier address barrier in s_barrier[]. If not
188 * found, add it.
189 */
190static
191struct barrier_info*
192DRD_(barrier_get_or_allocate)(const Addr barrier,
193                              const BarrierT barrier_type, const Word count)
194{
195   struct barrier_info *p;
196
197   tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
198
199   tl_assert(offsetof(DrdClientobj, barrier) == 0);
200   p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
201   if (p == 0)
202   {
203      p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
204      DRD_(barrier_initialize)(p, barrier, barrier_type, count);
205   }
206   return p;
207}
208
209/**
210 * Look up the address of the struct barrier_info associated with the
211 * client-side barrier object.
212 */
213static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
214{
215   tl_assert(offsetof(DrdClientobj, barrier) == 0);
216   return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
217}
218
219/**
220 * Initialize a barrier with given client address, barrier type and number of
221 * participants. The 'reinitialization' argument indicates whether a barrier
222 * object is being initialized or reinitialized.
223 *
224 * Called before pthread_barrier_init().
225 */
226void DRD_(barrier_init)(const Addr barrier,
227                        const BarrierT barrier_type, const Word count,
228                        const Bool reinitialization)
229{
230   struct barrier_info* p;
231
232   tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
233
234   if (count == 0)
235   {
236      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
237      VG_(maybe_record_error)(VG_(get_running_tid)(),
238                              BarrierErr,
239                              VG_(get_IP)(VG_(get_running_tid)()),
240                              "pthread_barrier_init: 'count' argument is zero",
241                              &bei);
242   }
243
244   if (! reinitialization && barrier_type == pthread_barrier)
245   {
246      p = DRD_(barrier_get)(barrier);
247      if (p)
248      {
249         BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
250         VG_(maybe_record_error)(VG_(get_running_tid)(),
251                                 BarrierErr,
252                                 VG_(get_IP)(VG_(get_running_tid)()),
253                                 "Barrier reinitialization",
254                                 &bei);
255      }
256   }
257
258   p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
259
260   if (s_trace_barrier) {
261      if (reinitialization)
262         DRD_(trace_msg)("[%d] barrier_reinit    %s 0x%lx count %ld -> %ld",
263                         DRD_(thread_get_running_tid)(),
264                         barrier_get_typename(p), barrier, p->count, count);
265      else
266         DRD_(trace_msg)("[%d] barrier_init      %s 0x%lx",
267                         DRD_(thread_get_running_tid)(),
268                         barrier_get_typename(p),
269                         barrier);
270   }
271
272   if (reinitialization && p->count != count)
273   {
274      if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
275      {
276         BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
277         VG_(maybe_record_error)(VG_(get_running_tid)(),
278                                 BarrierErr,
279                                 VG_(get_IP)(VG_(get_running_tid)()),
280                                 "Reinitialization of barrier with active"
281                                 " waiters",
282                                 &bei);
283      }
284      p->count = count;
285   }
286}
287
288/** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
289void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
290{
291   struct barrier_info* p;
292
293   p = DRD_(barrier_get)(barrier);
294
295   if (s_trace_barrier)
296      DRD_(trace_msg)("[%d] barrier_destroy   %s 0x%lx",
297                      DRD_(thread_get_running_tid)(),
298                      barrier_get_typename(p), barrier);
299
300   if (p == 0)
301   {
302      GenericErrInfo GEI = {
303	 .tid = DRD_(thread_get_running_tid)(),
304	 .addr = barrier,
305      };
306      VG_(maybe_record_error)(VG_(get_running_tid)(),
307                              GenericErr,
308                              VG_(get_IP)(VG_(get_running_tid)()),
309                              "Not a barrier",
310                              &GEI);
311      return;
312   }
313
314   if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
315   {
316      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
317      VG_(maybe_record_error)(VG_(get_running_tid)(),
318                              BarrierErr,
319                              VG_(get_IP)(VG_(get_running_tid)()),
320                              "Destruction of a barrier with active waiters",
321                              &bei);
322   }
323
324   DRD_(clientobj_remove)(p->a1, ClientBarrier);
325}
326
327/** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
328void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
329                            const BarrierT barrier_type)
330{
331   struct barrier_info* p;
332   struct barrier_thread_info* q;
333   const UWord word_tid = tid;
334   OSet* oset;
335
336   p = DRD_(barrier_get)(barrier);
337   if (p == 0 && barrier_type == gomp_barrier) {
338      /*
339       * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
340       * not. The only cause I know of that can trigger this is that libgomp.so
341       * has been compiled with --enable-linux-futex.
342       */
343      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
344      VG_(maybe_record_error)(VG_(get_running_tid)(),
345                              BarrierErr,
346                              VG_(get_IP)(VG_(get_running_tid)()),
347                              "Please verify whether gcc has been configured"
348                              " with option --disable-linux-futex. See also"
349                              " the section about OpenMP in the DRD manual.",
350                              &bei);
351   }
352   tl_assert(p);
353
354   if (s_trace_barrier)
355      DRD_(trace_msg)("[%d] barrier_pre_wait  %s 0x%lx iteration %ld",
356                      DRD_(thread_get_running_tid)(),
357                      barrier_get_typename(p), barrier, p->pre_iteration);
358
359   /* Clean up nodes associated with finished threads. */
360   oset = p->oset[p->pre_iteration & 1];
361   tl_assert(oset);
362   VG_(OSetGen_ResetIter)(oset);
363   for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
364      if (q->thread_finished) {
365         void* r = VG_(OSetGen_Remove)(oset, &q->tid);
366         tl_assert(r == q);
367         DRD_(barrier_thread_destroy)(q);
368         VG_(OSetGen_FreeNode)(oset, q);
369         VG_(OSetGen_ResetIterAt)(oset, &word_tid);
370      }
371   }
372   /* Allocate the per-thread data structure if necessary. */
373   q = VG_(OSetGen_Lookup)(oset, &word_tid);
374   if (q == NULL) {
375      q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
376      DRD_(barrier_thread_initialize)(q, tid);
377      VG_(OSetGen_Insert)(oset, q);
378      tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
379   }
380
381   /* Record *_barrier_wait() call context. */
382   q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
383
384   /*
385    * Store a pointer to the latest segment of the current thread in the
386    * per-thread data structure.
387    */
388   DRD_(thread_get_latest_segment)(&q->sg, tid);
389
390   /*
391    * If the same number of threads as the barrier count indicates have
392    * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
393    * reset the p->pre_waiters_left counter.
394    */
395   if (--p->pre_waiters_left <= 0)
396   {
397      p->pre_iteration++;
398      p->pre_waiters_left = p->count;
399   }
400}
401
402/** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
403void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
404                             const BarrierT barrier_type, const Bool waited,
405                             const Bool serializing)
406{
407   struct barrier_info* p;
408   const UWord word_tid = tid;
409   struct barrier_thread_info* q;
410   struct barrier_thread_info* r;
411   OSet* oset;
412
413   p = DRD_(barrier_get)(barrier);
414
415   if (s_trace_barrier)
416      DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s",
417                      tid, p ? barrier_get_typename(p) : "(?)",
418                      barrier, p ? p->post_iteration : -1,
419                      serializing ? " (serializing)" : "");
420
421   /*
422    * If p == 0, this means that the barrier has been destroyed after
423    * *_barrier_wait() returned and before this function was called. Just
424    * return in that case -- race conditions between *_barrier_wait()
425    * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
426    */
427   if (p == 0)
428      return;
429
430   /* If the *_barrier_wait() call returned an error code, exit. */
431   if (! waited)
432      return;
433
434   oset = p->oset[p->post_iteration & 1];
435   q = VG_(OSetGen_Lookup)(oset, &word_tid);
436   if (p->pre_iteration - p->post_iteration > 1) {
437      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
438      VG_(maybe_record_error)(VG_(get_running_tid)(),
439                              BarrierErr,
440                              VG_(get_IP)(VG_(get_running_tid)()),
441                              "Number of concurrent pthread_barrier_wait()"
442                              " calls exceeds the barrier count",
443                              &bei);
444   } else if (q == NULL) {
445      BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
446      VG_(maybe_record_error)(VG_(get_running_tid)(),
447                              BarrierErr,
448                              VG_(get_IP)(VG_(get_running_tid)()),
449                              "Error in barrier implementation"
450                              " -- barrier_wait() started before"
451                              " barrier_destroy() and finished after"
452                              " barrier_destroy()",
453                              &bei);
454   }
455   if (q == NULL) {
456      q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
457      DRD_(barrier_thread_initialize)(q, tid);
458      VG_(OSetGen_Insert)(oset, q);
459      tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
460      DRD_(thread_get_latest_segment)(&q->sg, tid);
461   }
462
463   /* Create a new segment and store a pointer to that segment. */
464   DRD_(thread_new_segment)(tid);
465   DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
466   s_barrier_segment_creation_count++;
467
468   /*
469    * Combine all vector clocks that were stored in the pre_barrier_wait
470    * wrapper with the vector clock of the current thread.
471    */
472   {
473      VectorClock old_vc;
474
475      DRD_(vc_copy)(&old_vc, DRD_(thread_get_vc)(tid));
476      VG_(OSetGen_ResetIter)(oset);
477      for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
478      {
479         if (r != q)
480         {
481            tl_assert(r->sg);
482            DRD_(vc_combine)(DRD_(thread_get_vc)(tid), &r->sg->vc);
483         }
484      }
485      DRD_(thread_update_conflict_set)(tid, &old_vc);
486      DRD_(vc_cleanup)(&old_vc);
487   }
488
489   /*
490    * If the same number of threads as the barrier count indicates have
491    * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
492    * reset the p->post_waiters_left counter.
493    */
494   if (--p->post_waiters_left <= 0)
495   {
496      p->post_iteration++;
497      p->post_waiters_left = p->count;
498   }
499}
500
501/** Called when thread tid stops to exist. */
502static void barrier_delete_thread(struct barrier_info* const p,
503                                  const DrdThreadId tid)
504{
505   struct barrier_thread_info* q;
506   const UWord word_tid = tid;
507   int i;
508
509   for (i = 0; i < 2; i++) {
510      q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
511      if (q)
512         q->thread_finished = True;
513   }
514}
515
516/**
517 * Report that *_barrier_destroy() has been called but that this call was
518 * not synchronized with the last *_barrier_wait() call on the same barrier.
519 *
520 * This topic has been discussed extensively on comp.programming.threads
521 * (February 3, 2009). See also
522 * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
523 */
524static
525void barrier_report_wait_delete_race(const struct barrier_info* const p,
526                                     const struct barrier_thread_info* const q)
527{
528   tl_assert(p);
529   tl_assert(q);
530
531   {
532      BarrierErrInfo bei
533         = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
534      VG_(maybe_record_error)(VG_(get_running_tid)(),
535                              BarrierErr,
536                              VG_(get_IP)(VG_(get_running_tid)()),
537                              "Destruction of barrier not synchronized with"
538                              " barrier wait call",
539                              &bei);
540   }
541}
542
543static const HChar* barrier_get_typename(struct barrier_info* const p)
544{
545   tl_assert(p);
546
547   return barrier_type_name(p->barrier_type);
548}
549
550static const HChar* barrier_type_name(const BarrierT bt)
551{
552   switch (bt)
553   {
554   case pthread_barrier:
555      return "pthread barrier";
556   case gomp_barrier:
557      return "gomp barrier";
558   }
559   return "?";
560}
561
562ULong DRD_(get_barrier_segment_creation_count)(void)
563{
564   return s_barrier_segment_creation_count;
565}
566