libmpiwrap.c revision 9bea4c13fca0e3bb4b719dcb3ed63d47d479294e
1
2/*---------------------------------------------------------------*/
3/*---                                                         ---*/
4/*--- A library of wrappers for MPI 2 functions.              ---*/
5/*---                                                         ---*/
6/*---------------------------------------------------------------*/
7
8/* ----------------------------------------------------------------
9
10   Notice that the following BSD-style license applies to this one
11   file (mpiwrap.c) only.  The rest of Valgrind is licensed under the
12   terms of the GNU General Public License, version 2, unless
13   otherwise indicated.  See the COPYING file in the source
14   distribution for details.
15
16   ----------------------------------------------------------------
17
18   This file is part of Valgrind, a dynamic binary instrumentation
19   framework.
20
21   Copyright (C) 2006-2010 OpenWorks LLP.  All rights reserved.
22
23   Redistribution and use in source and binary forms, with or without
24   modification, are permitted provided that the following conditions
25   are met:
26
27   1. Redistributions of source code must retain the above copyright
28      notice, this list of conditions and the following disclaimer.
29
30   2. The origin of this software must not be misrepresented; you must
31      not claim that you wrote the original software.  If you use this
32      software in a product, an acknowledgment in the product
33      documentation would be appreciated but is not required.
34
35   3. Altered source versions must be plainly marked as such, and must
36      not be misrepresented as being the original software.
37
38   4. The name of the author may not be used to endorse or promote
39      products derived from this software without specific prior written
40      permission.
41
42   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
43   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
44   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
46   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
48   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
49   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
50   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53
54   Neither the names of the U.S. Department of Energy nor the
55   University of California nor the names of its contributors may be
56   used to endorse or promote products derived from this software
57   without prior written permission.
58*/
59
60/* Handling of MPI_STATUS{ES}_IGNORE for MPI_Status* arguments.
61
62   The MPI-2 spec allows many functions which have MPI_Status* purely
63   as an out parameter, to accept the constants MPI_STATUS_IGNORE or
64   MPI_STATUSES_IGNORE there instead, if the caller does not care
65   about the status.  See the MPI-2 spec sec 4.5.1 ("Passing
66   MPI_STATUS_IGNORE for Status").  (mpi2-report.pdf, 1615898 bytes,
67   md5=694a5efe2fd291eecf7e8c9875b5f43f).
68
69   This library handles such cases by allocating a fake MPI_Status
70   object (on the stack) or an array thereof (on the heap), and
71   passing that onwards instead.  From the outside the caller sees no
72   difference.  Unfortunately the simpler approach of merely detecting
73   and handling these special cases at a lower level does not work,
74   because we need to use information returned in MPI_Status*
75   arguments to paint result buffers, even if the caller doesn't
76   supply a real MPI_Status object.
77
78   Eg, MPI_Recv.  We can't paint the result buffer without knowing how
79   many items arrived; but we can't find that out without passing a
80   real MPI_Status object to the (real) MPI_Recv call.  Hence, if the
81   caller did not supply one, we have no option but to use a temporary
82   stack allocated one for the inner call.  Ditto, more indirectly
83   (via maybe_complete) for nonblocking receives and the various
84   associated wait/test calls. */
85
86
87/*------------------------------------------------------------*/
88/*--- includes                                             ---*/
89/*------------------------------------------------------------*/
90
91#include <stdio.h>
92#include <assert.h>
93#include <unistd.h>     /* getpid */
94#include <stdlib.h>     /* exit */
95#include <string.h>     /* strstr */
96#include <pthread.h>    /* pthread_mutex_{lock,unlock} */
97
98/* Include Valgrind magic macros for writing wrappers. */
99#include "../memcheck/memcheck.h"
100
101
102/*------------------------------------------------------------*/
103/*--- Connect to MPI library                               ---*/
104/*------------------------------------------------------------*/
105
106/* Include headers for whatever MPI implementation the wrappers are to
107   be used with.  The configure system will tell us what the path to
108   the chosen MPI implementation is, via -I.. to the compiler. */
109#include "mpi.h"
110
111/* Where are API symbols?
112   Open MPI      lib/libmpi.so,   soname = libmpi.so.0
113   Quadrics MPI  lib/libmpi.so,   soname = libmpi.so.0
114   MPICH         libmpich.so.1.0, soname = libmpich.so.1.0
115   AIX: in /usr/lpp/ppe.poe/lib/libmpi_r.a(mpicore*_r.o)
116
117   For the non-AIX targets, a suitable soname to match with
118   is "libmpi*.so*".
119*/
120#if defined(_AIX)
121# define I_WRAP_FNNAME_U(_name) \
122         I_WRAP_SONAME_FNNAME_ZU(libmpiZurZdaZLmpicoreZaZurZdoZR,_name)
123  /* Don't change this without also changing all the names in
124     libmpiwrap.exp. */
125#else
126# define I_WRAP_FNNAME_U(_name) \
127         I_WRAP_SONAME_FNNAME_ZU(libmpiZaZdsoZa,_name)
128
129#endif
130
131
132/* Define HAVE_MPI_STATUS_IGNORE iff we have to deal with
133   MPI_STATUS{ES}_IGNORE. */
134#if MPI_VERSION >= 2 \
135    || (defined(MPI_STATUS_IGNORE) && defined(MPI_STATUSES_IGNORE))
136#  undef HAVE_MPI_STATUS_IGNORE
137#  define HAVE_MPI_STATUS_IGNORE 1
138#else
139#  undef HAVE_MPI_STATUS_IGNORE
140#endif
141
142
143/*------------------------------------------------------------*/
144/*--- Decls                                                ---*/
145/*------------------------------------------------------------*/
146
147typedef  unsigned char  Bool;
148#define False ((Bool)0)
149#define True  ((Bool)1)
150
151/* Word, UWord are machine words - same size as a pointer.  This is
152   checked at startup.  The wrappers below use 'long' to mean a
153   machine word - this too is tested at startup. */
154typedef    signed long  Word;
155typedef  unsigned long  UWord;
156
157#if !defined(offsetof)
158#  define offsetof(type,memb) ((int)&((type*)0)->memb)
159#endif
160
161/* Find the size of long double image (not 'sizeof(long double)').
162   See comments in sizeofOneNamedTy. */
163static long sizeof_long_double_image ( void );
164
165
166/*------------------------------------------------------------*/
167/*--- Simple helpers                                       ---*/
168/*------------------------------------------------------------*/
169
170/* ------ Helpers for debug printing ------ */
171
172/* constant */
173static const char* preamble = "valgrind MPI wrappers";
174
175/* established at startup */
176static pid_t my_pid         = -1;
177static char* options_str    = NULL;
178static int   opt_verbosity  = 1;
179static Bool  opt_missing    = 0; /* 0:silent; 1:warn; 2:abort */
180static Bool  opt_help       = False;
181static Bool  opt_initkludge = False;
182
183static void before ( char* fnname )
184{
185   /* This isn't thread-safe wrt 'done' (no locking).  It's not
186      critical. */
187   static int done = 0;
188   if (done == 0) {
189      done = 1;
190      my_pid = getpid();
191      options_str = getenv("MPIWRAP_DEBUG");
192      if (options_str) {
193         if (NULL != strstr(options_str, "warn"))
194            opt_missing = 1;
195         if (NULL != strstr(options_str, "strict"))
196            opt_missing = 2;
197         if (NULL != strstr(options_str, "verbose"))
198            opt_verbosity++;
199         if (NULL != strstr(options_str, "quiet"))
200            opt_verbosity--;
201         if (NULL != strstr(options_str, "help"))
202            opt_help = True;
203         if (NULL != strstr(options_str, "initkludge"))
204            opt_initkludge = True;
205      }
206      if (opt_verbosity > 0)
207         fprintf(stderr, "%s %5d: Active for pid %d\n",
208                         preamble, my_pid, my_pid);
209      /* Sanity check - that Word/UWord really are machine words. */
210      assert(sizeof(Word)  == sizeof(void*));
211      assert(sizeof(UWord) == sizeof(void*));
212      /* Sanity check - char is byte-sized (else address calculations
213         in walk_type don't work. */
214      assert(sizeof(char) == 1);
215      if (opt_help) {
216         fprintf(stderr, "\n");
217         fprintf(stderr, "Valid options for the MPIWRAP_DEBUG environment"
218                         " variable are:\n");
219         fprintf(stderr, "\n");
220         fprintf(stderr, "   quiet       be silent except for errors\n");
221         fprintf(stderr, "   verbose     show wrapper entries/exits\n");
222         fprintf(stderr, "   strict      abort the program if a function"
223                         " with no wrapper is used\n");
224         fprintf(stderr, "   warn        give a warning if a function"
225                         " with no wrapper is used\n");
226         fprintf(stderr, "   help        display this message, then exit\n");
227         fprintf(stderr, "   initkludge  debugging hack; do not use\n");
228         fprintf(stderr, "\n");
229         fprintf(stderr, "Multiple options are allowed, eg"
230                         " MPIWRAP_DEBUG=strict,verbose\n");
231         fprintf(stderr, "Note: 'warn' generates output even if 'quiet'"
232                         " is also specified\n");
233         fprintf(stderr, "\n");
234         fprintf(stderr, "%s %5d: exiting now\n", preamble, my_pid );
235         exit(1);
236      }
237      if (opt_verbosity > 0)
238         fprintf(stderr,
239                 "%s %5d: Try MPIWRAP_DEBUG=help for possible options\n",
240                 preamble, my_pid);
241
242   }
243   if (opt_verbosity > 1)
244      fprintf(stderr, "%s %5d: enter PMPI_%s\n", preamble,  my_pid, fnname );
245}
246
247static __inline__ void after ( char* fnname, int err )
248{
249   if (opt_verbosity > 1)
250      fprintf(stderr, "%s %5d:  exit PMPI_%s (err = %d)\n",
251                      preamble, my_pid, fnname, err );
252}
253
254static void barf ( char* msg )
255{
256   fprintf(stderr, "%s %5d: fatal: %s\n",   preamble, my_pid, msg);
257   fprintf(stderr, "%s %5d: exiting now\n", preamble, my_pid );
258   exit(1);
259}
260
261/* Half-hearted type-showing function (for debugging). */
262static void showTy ( FILE* f, MPI_Datatype ty )
263{
264        if (ty == MPI_DATATYPE_NULL)  fprintf(f,"DATATYPE_NULL");
265   else if (ty == MPI_BYTE)           fprintf(f,"BYTE");
266   else if (ty == MPI_PACKED)         fprintf(f,"PACKED");
267   else if (ty == MPI_CHAR)           fprintf(f,"CHAR");
268   else if (ty == MPI_SHORT)          fprintf(f,"SHORT");
269   else if (ty == MPI_INT)            fprintf(f,"INT");
270   else if (ty == MPI_LONG)           fprintf(f,"LONG");
271   else if (ty == MPI_FLOAT)          fprintf(f,"FLOAT");
272   else if (ty == MPI_DOUBLE)         fprintf(f,"DOUBLE");
273   else if (ty == MPI_LONG_DOUBLE)    fprintf(f,"LONG_DOUBLE");
274   else if (ty == MPI_UNSIGNED_CHAR)  fprintf(f,"UNSIGNED_CHAR");
275   else if (ty == MPI_UNSIGNED_SHORT) fprintf(f,"UNSIGNED_SHORT");
276   else if (ty == MPI_UNSIGNED_LONG)  fprintf(f,"UNSIGNED_LONG");
277   else if (ty == MPI_UNSIGNED)       fprintf(f,"UNSIGNED");
278   else if (ty == MPI_FLOAT_INT)      fprintf(f,"FLOAT_INT");
279   else if (ty == MPI_DOUBLE_INT)     fprintf(f,"DOUBLE_INT");
280   else if (ty == MPI_LONG_DOUBLE_INT) fprintf(f,"LONG_DOUBLE_INT");
281   else if (ty == MPI_LONG_INT)       fprintf(f,"LONG_INT");
282   else if (ty == MPI_SHORT_INT)      fprintf(f,"SHORT_INT");
283   else if (ty == MPI_2INT)           fprintf(f,"2INT");
284   else if (ty == MPI_UB)             fprintf(f,"UB");
285   else if (ty == MPI_LB)             fprintf(f,"LB");
286#  if defined(MPI_WCHAR)
287   else if (ty == MPI_WCHAR)          fprintf(f,"WCHAR");
288#  endif
289   else if (ty == MPI_LONG_LONG_INT)  fprintf(f,"LONG_LONG_INT");
290#  if defined(MPI_LONG_LONG)
291   else if (ty == MPI_LONG_LONG)      fprintf(f,"LONG_LONG");
292#  endif
293#  if defined(MPI_UNSIGNED_LONG_LONG)
294   else if (ty == MPI_UNSIGNED_LONG_LONG) fprintf(f,"UNSIGNED_LONG_LONG");
295#  endif
296#  if defined(MPI_REAL8)
297   else if (ty == MPI_REAL8)          fprintf(f, "REAL8");
298#  endif
299#  if defined(MPI_REAL4)
300   else if (ty == MPI_REAL4)          fprintf(f, "REAL4");
301#  endif
302#  if defined(MPI_REAL)
303   else if (ty == MPI_REAL)           fprintf(f, "REAL");
304#  endif
305#  if defined(MPI_INTEGER8)
306   else if (ty == MPI_INTEGER8)       fprintf(f, "INTEGER8");
307#  endif
308#  if defined(MPI_INTEGER4)
309   else if (ty == MPI_INTEGER4)       fprintf(f, "INTEGER4");
310#  endif
311#  if defined(MPI_INTEGER)
312   else if (ty == MPI_INTEGER)        fprintf(f, "INTEGER");
313#  endif
314#  if defined(MPI_DOUBLE_PRECISION)
315   else if (ty == MPI_DOUBLE_PRECISION) fprintf(f, "DOUBLE_PRECISION");
316#  endif
317#  if defined(MPI_COMPLEX)
318   else if (ty == MPI_COMPLEX)          fprintf(f, "COMPLEX");
319#  endif
320#  if defined(MPI_DOUBLE_COMPLEX)
321   else if (ty == MPI_DOUBLE_COMPLEX)   fprintf(f, "DOUBLE_COMPLEX");
322#  endif
323#  if defined(MPI_LOGICAL)
324   else if (ty == MPI_LOGICAL)          fprintf(f, "LOGICAL");
325#  endif
326#  if defined(MPI_2INTEGER)
327   else if (ty == MPI_2INTEGER)         fprintf(f, "2INTEGER");
328#  endif
329#  if defined(MPI_2COMPLEX)
330   else if (ty == MPI_2COMPLEX)         fprintf(f, "2COMPLEX");
331#  endif
332#  if defined(MPI_2DOUBLE_COMPLEX)
333   else if (ty == MPI_2DOUBLE_COMPLEX)  fprintf(f, "2DOUBLE_COMPLEX");
334#  endif
335#  if defined(MPI_2REAL)
336   else if (ty == MPI_2REAL)            fprintf(f, "2REAL");
337#  endif
338#  if defined(MPI_2DOUBLE_PRECISION)
339   else if (ty == MPI_2DOUBLE_PRECISION) fprintf(f, "2DOUBLE_PRECISION");
340#  endif
341#  if defined(MPI_CHARACTER)
342   else if (ty == MPI_CHARACTER)         fprintf(f, "CHARACTER");
343#  endif
344   else fprintf(f,"showTy:???");
345}
346
347static void showCombiner ( FILE* f, int combiner )
348{
349   switch (combiner) {
350      case MPI_COMBINER_NAMED:       fprintf(f, "NAMED"); break;
351#if   defined(MPI_COMBINER_DUP)
352      case MPI_COMBINER_DUP:         fprintf(f, "DUP"); break;
353#     endif
354      case MPI_COMBINER_CONTIGUOUS:  fprintf(f, "CONTIGUOUS"); break;
355      case MPI_COMBINER_VECTOR:      fprintf(f, "VECTOR"); break;
356#if   defined(MPI_COMBINER_HVECTOR_INTEGER)
357      case MPI_COMBINER_HVECTOR_INTEGER: fprintf(f, "HVECTOR_INTEGER"); break;
358#     endif
359      case MPI_COMBINER_HVECTOR:     fprintf(f, "HVECTOR"); break;
360      case MPI_COMBINER_INDEXED:     fprintf(f, "INDEXED"); break;
361#if   defined(MPI_COMBINER_HINDEXED_INTEGER)
362      case MPI_COMBINER_HINDEXED_INTEGER: fprintf(f, "HINDEXED_INTEGER"); break;
363#     endif
364      case MPI_COMBINER_HINDEXED:    fprintf(f, "HINDEXED"); break;
365#if   defined(MPI_COMBINER_INDEXED_BLOCK)
366      case MPI_COMBINER_INDEXED_BLOCK: fprintf(f, "INDEXED_BLOCK"); break;
367#     endif
368#if   defined(MPI_COMBINER_STRUCT_INTEGER)
369      case MPI_COMBINER_STRUCT_INTEGER: fprintf(f, "STRUCT_INTEGER"); break;
370#     endif
371      case MPI_COMBINER_STRUCT:      fprintf(f, "STRUCT"); break;
372#if   defined(MPI_COMBINER_SUBARRAY)
373      case MPI_COMBINER_SUBARRAY:    fprintf(f, "SUBARRAY"); break;
374#     endif
375#if   defined(MPI_COMBINER_DARRAY)
376      case MPI_COMBINER_DARRAY:      fprintf(f, "DARRAY"); break;
377#     endif
378#if   defined(MPI_COMBINER_F90_REAL)
379      case MPI_COMBINER_F90_REAL:    fprintf(f, "F90_REAL"); break;
380#     endif
381#if   defined(MPI_COMBINER_F90_COMPLEX)
382      case MPI_COMBINER_F90_COMPLEX: fprintf(f, "F90_COMPLEX"); break;
383#     endif
384#if   defined(MPI_COMBINER_F90_INTEGER)
385      case MPI_COMBINER_F90_INTEGER: fprintf(f, "F90_INTEGER"); break;
386#     endif
387#if   defined(MPI_COMBINER_RESIZED)
388      case MPI_COMBINER_RESIZED:     fprintf(f, "RESIZED"); break;
389#     endif
390      default: fprintf(f, "showCombiner:??"); break;
391   }
392}
393
394
395/* ------ Get useful bits of info ------ */
396
397/* Note, PMPI_Comm_rank/size are themselves wrapped.  Should work
398   fine. */
399
400static __inline__ int comm_rank ( MPI_Comm comm )
401{
402   int err, r;
403   err = PMPI_Comm_rank(comm, &r);
404   return err ? 0/*arbitrary*/ : r;
405}
406
407static __inline__ int comm_size ( MPI_Comm comm )
408{
409   int err, r;
410   err = PMPI_Comm_size(comm, &r);
411   return err ? 0/*arbitrary*/ : r;
412}
413
414static __inline__ Bool count_from_Status( /*OUT*/int* recv_count,
415                                      MPI_Datatype datatype,
416                                      MPI_Status* status)
417{
418   int n;
419   int err = PMPI_Get_count(status, datatype, &n);
420   if (err == MPI_SUCCESS) {
421      *recv_count = n;
422      return True;
423   } else {
424      return False;
425   }
426}
427
428/* It's critical that we can do equality on MPI_Requests.
429   Unfortunately these are opaque objects to us (handles, in the
430   parlance of the MPI 1.1 spec).  Fortunately Sec 2.4.1 ("Opaque
431   Objects") specifies that "In C, [...] These [handles] should be
432   types that support assignment and equality operations."  Hence the
433   following function should compile for any compliant definition of
434   MPI_Request. */
435static __inline__
436Bool eq_MPI_Request ( MPI_Request r1, MPI_Request r2 )
437{
438   return r1 == r2;
439}
440
441/* Return True if status is MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE.
442   On MPI-1.x platforms which don't have these symbols (and they would
443   only have them if they've been backported from 2.x) always return
444   False. */
445static __inline__
446Bool isMSI ( MPI_Status* status )
447{
448#  if defined(HAVE_MPI_STATUS_IGNORE)
449   return status == MPI_STATUSES_IGNORE || status == MPI_STATUS_IGNORE;
450#  else
451   return False;
452#  endif
453}
454
455/* Get the 'extent' of a type.  Note, as per the MPI spec this
456   includes whatever padding would be required when using 'ty' in an
457   array. */
458static long extentOfTy ( MPI_Datatype ty )
459{
460   int      r;
461   MPI_Aint n;
462   r = PMPI_Type_extent(ty, &n);
463   assert(r == MPI_SUCCESS);
464   return (long)n;
465}
466
467/* Free up *ty, if it is safe to do so */
468static void maybeFreeTy ( MPI_Datatype* ty )
469{
470   int r, n_ints, n_addrs, n_dtys, tycon;
471
472   r = PMPI_Type_get_envelope( *ty, &n_ints, &n_addrs, &n_dtys, &tycon );
473   assert(r == MPI_SUCCESS);
474
475   /* can't free named types */
476   if (tycon == MPI_COMBINER_NAMED)
477      return;
478
479   /* some kinds of structs are predefined so we can't free them
480      either. */
481   if (*ty == MPI_FLOAT_INT || *ty == MPI_DOUBLE_INT
482       || *ty == MPI_LONG_INT || *ty == MPI_2INT
483       || *ty == MPI_SHORT_INT || *ty == MPI_LONG_DOUBLE_INT)
484      return;
485
486   /* Looks OK - free it. */
487   if (0) {
488      /* show me what you're about to free .. */
489      fprintf(stderr, "freeing combiner ");
490      showCombiner(stderr,tycon);
491      fprintf(stderr, " ty= ");
492      showTy(stderr,*ty);
493      fprintf(stderr,"\n");
494   }
495   r = PMPI_Type_free(ty);
496   assert(r == MPI_SUCCESS);
497}
498
499/* How big is a "named" (base) type?  Returns 0 if not known.  Note.
500   There is a subtlety, which is that this is required to return the
501   exact size of one item of the type, NOT the size of it when padded
502   suitably to make an array of them.  In particular that's why the
503   size of LONG_DOUBLE is computed by looking at the result of doing a
504   long double store, rather than just asking what is the sizeof(long
505   double).
506
507   For LONG_DOUBLE on x86-linux and amd64-linux my impression is that
508   the right answer is 10 even though sizeof(long double) says 12 and
509   16 respectively.  On ppc32-linux it appears to be 16.
510
511   Ref: MPI 1.1 doc p18 */
512static long sizeofOneNamedTy ( MPI_Datatype ty )
513{
514   if (ty == MPI_CHAR)           return sizeof(signed char);
515   if (ty == MPI_SHORT)          return sizeof(signed short int);
516   if (ty == MPI_INT)            return sizeof(signed int);
517   if (ty == MPI_LONG)           return sizeof(signed long int);
518   if (ty == MPI_UNSIGNED_CHAR)  return sizeof(unsigned char);
519   if (ty == MPI_UNSIGNED_SHORT) return sizeof(unsigned short int);
520   if (ty == MPI_UNSIGNED)       return sizeof(unsigned int);
521   if (ty == MPI_UNSIGNED_LONG)  return sizeof(unsigned long int);
522   if (ty == MPI_FLOAT)          return sizeof(float);
523   if (ty == MPI_DOUBLE)         return sizeof(double);
524   if (ty == MPI_BYTE)           return 1;
525   if (ty == MPI_LONG_DOUBLE)    return sizeof_long_double_image();
526   if (ty == MPI_PACKED)         return 1;
527   if (ty == MPI_LONG_LONG_INT)  return sizeof(signed long long int);
528
529#  if defined(MPI_REAL8)
530   if (ty == MPI_REAL8)          return 8; /* MPI2 spec */;
531#  endif
532#  if defined(MPI_REAL4)
533   if (ty == MPI_REAL4)          return 4; /* MPI2 spec */;
534#  endif
535#  if defined(MPI_REAL)
536   if (ty == MPI_REAL)           return 4; /* MPI2 spec */;
537#  endif
538#  if defined(MPI_INTEGER8)
539   if (ty == MPI_INTEGER8)       return 8; /* MPI2 spec */;
540#  endif
541#  if defined(MPI_INTEGER4)
542   if (ty == MPI_INTEGER4)       return 4; /* MPI2 spec */;
543#  endif
544#  if defined(MPI_INTEGER)
545   if (ty == MPI_INTEGER)        return 4; /* MPI2 spec */;
546#  endif
547#  if defined(MPI_DOUBLE_PRECISION)
548   if (ty == MPI_DOUBLE_PRECISION) return 8; /* MPI2 spec */;
549#  endif
550
551   /* new in MPI2: */
552#  if defined(MPI_WCHAR)
553   if (ty == MPI_WCHAR)              return 2; /* MPI2 spec */;
554#  endif
555#  if defined(MPI_SIGNED_CHAR)
556   if (ty == MPI_SIGNED_CHAR)        return 1; /* MPI2 spec */;
557#  endif
558#  if defined(MPI_UNSIGNED_LONG_LONG)
559   if (ty == MPI_UNSIGNED_LONG_LONG) return 8; /* MPI2 spec */;
560#  endif
561#  if defined(MPI_COMPLEX)
562   if (ty == MPI_COMPLEX)            return 2 * 4; /* MPI2 spec */
563#  endif
564#  if defined(MPI_DOUBLE_COMPLEX)
565   if (ty == MPI_DOUBLE_COMPLEX)     return 2 * 8; /* MPI2 spec */
566#  endif
567#  if defined(MPI_LOGICAL)
568   if (ty == MPI_LOGICAL)            return 4; /* MPI2 spec */
569#  endif
570#  if defined(MPI_2INTEGER)
571   if (ty == MPI_2INTEGER)      return 2 * 4; /* undocumented in MPI2 */
572#  endif
573#  if defined(MPI_2COMPLEX)
574   if (ty == MPI_2COMPLEX)      return 2 * 8; /* undocumented in MPI2 */
575#  endif
576#  if defined(MPI_2DOUBLE_COMPLEX)
577   /* 32: this is how openmpi-1.2.2 behaves on x86-linux, but I have
578      really no idea if this is right. */
579   if (ty == MPI_2DOUBLE_COMPLEX)   return 32; /* undocumented in MPI2 */
580#  endif
581#  if defined(MPI_2REAL)
582   if (ty == MPI_2REAL)              return 2 * 4; /* undocumented in MPI2 */
583#  endif
584#  if defined(MPI_2DOUBLE_PRECISION)
585   if (ty == MPI_2DOUBLE_PRECISION)  return 2 * 8; /* undocumented in MPI2 */
586#  endif
587#  if defined(MPI_CHARACTER)
588   if (ty == MPI_CHARACTER)          return 1; /* MPI2 spec */
589#  endif
590
591   /* Note: the following are named structs, not named basic types,
592      and so are not handled here:
593         FLOAT_INT DOUBLE_INT LONG_INT 2INT SHORT_INT LONG_DOUBLE_INT
594      My guess is they are probably for doing max-w-index style
595      reductions, the INT carrying the index of the max/min and the
596      other type its actual value.
597   */
598   return 0;
599}
600
601
602/* Find the size of long double image (not 'sizeof(long double)').
603   See comments in sizeofOneNamedTy.
604*/
605static long sizeof_long_double_image ( void )
606{
607   long i;
608   unsigned char* p;
609   static long cached_result = 0;
610
611   /* Hopefully we have it already. */
612   if (cached_result != 0) {
613      assert(cached_result == 10 || cached_result == 16 || cached_result == 8);
614      return cached_result;
615   }
616
617   /* No?  Then we'll have to compute it.  This isn't thread-safe but
618      it doesn't really matter since all races to compute it should
619      produce the same answer. */
620   p = malloc(64);
621   assert(p);
622   for (i = 0; i < 64; i++)
623      p[i] = 0x55;
624
625   /* Write a value which isn't known at compile time and therefore
626      must come out of a register.  If we just store a constant here,
627      some compilers write more data than a store from a machine
628      register would.  Therefore we have to force a store from a
629      machine register by storing a value which isn't known at compile
630      time.  Since getpid() will return a value < 1 million, turn it
631      into a zero by dividing by 1e+30. */
632   *(long double*)(&p[16]) = (long double)(1.0e-30 * (double)getpid());
633
634   for (i = 0; i < 16; i++) {
635      assert(p[i] == 0x55);
636      assert(p[i+48] == 0x55);
637   }
638   for (i = 16; i <= 48; i++) {
639      if (p[i] == 0x55)
640         break;
641   }
642
643   assert(i < 48);
644   assert(i > 16);
645   free(p);
646   cached_result = i - 16;
647
648   if (0)
649      printf("sizeof_long_double_image: computed %d\n", (int)cached_result);
650
651   assert(cached_result == 10 || cached_result == 16 || cached_result == 8);
652   return cached_result;
653}
654
655
656/*------------------------------------------------------------*/
657/*--- Unpicking datatypes                                  ---*/
658/*------------------------------------------------------------*/
659
660static __inline__
661void walk_type_array ( void(*f)(void*,long), char* base,
662                       MPI_Datatype ty, long count );
663
664
665/* Walk over all fragments of the object of type 'ty' with base
666   address 'base', and apply 'f' to the start/length of each
667   contiguous fragment. */
668static
669void walk_type ( void(*f)(void*,long), char* base, MPI_Datatype ty )
670{
671   int  r, n_ints, n_addrs, n_dtys, tycon;
672   long ex, i;
673   int*          ints  = NULL;
674   MPI_Aint*     addrs = NULL;
675   MPI_Datatype* dtys  = NULL;
676
677   /* Stuff for limiting how much complaining text it spews out */
678   static int complaints = 3;
679   static int last_complained_about_tycon = -987654321; /* presumably bogus */
680
681   if (0)
682      printf("walk_type %p\n", (void*)(unsigned long)ty);
683
684   r = PMPI_Type_get_envelope( ty, &n_ints, &n_addrs, &n_dtys, &tycon );
685   assert(r == MPI_SUCCESS);
686
687   /* Handle the base cases fast(er/ish). */
688   if (tycon == MPI_COMBINER_NAMED) {
689      long sz = sizeofOneNamedTy(ty);
690      if (sz > 0) {
691         f(base, sz);
692         return;
693      }
694      /* Hmm.  Perhaps it's a named struct?  Unfortunately we can't
695         take them to bits so we have to do a really ugly hack, which
696         makes assumptions about how the MPI implementation has laid
697         out these types.  At least Open MPI 1.0.1 appears to put
698         the 'val' field first.  MPICH2 agrees.
699      */
700      if (ty == MPI_2INT) {
701         typedef struct { int val; int loc; } Ty;
702         f(base + offsetof(Ty,val), sizeof(int));
703         f(base + offsetof(Ty,loc), sizeof(int));
704         return;
705      }
706      if (ty == MPI_LONG_INT) {
707         typedef struct { long val; int loc; } Ty;
708         f(base + offsetof(Ty,val), sizeof(long));
709         f(base + offsetof(Ty,loc), sizeof(int));
710         return;
711      }
712      if (ty == MPI_DOUBLE_INT) {
713         typedef struct { double val; int loc; } Ty;
714         f(base + offsetof(Ty,val), sizeof(double));
715         f(base + offsetof(Ty,loc), sizeof(int));
716         return;
717      }
718      if (ty == MPI_SHORT_INT) {
719         typedef struct { short val; int loc; } Ty;
720         f(base + offsetof(Ty,val), sizeof(short));
721         f(base + offsetof(Ty,loc), sizeof(int));
722         return;
723      }
724      if (ty == MPI_FLOAT_INT) {
725         typedef struct { float val; int loc; } Ty;
726         f(base + offsetof(Ty,val), sizeof(float));
727         f(base + offsetof(Ty,loc), sizeof(int));
728         return;
729      }
730      if (ty == MPI_LONG_DOUBLE_INT) {
731         typedef struct { long double val; int loc; } Ty;
732         f(base + offsetof(Ty,val), sizeof_long_double_image());
733         f(base + offsetof(Ty,loc), sizeof(int));
734         return;
735      }
736      if (ty == MPI_LB || ty == MPI_UB)
737         return; /* have zero size, so nothing needs to be done */
738      goto unhandled;
739      /*NOTREACHED*/
740   }
741
742   if (0) {
743      ex = extentOfTy(ty);
744      printf("tycon 0x%llx %d %d %d (ext %d)\n",
745             (unsigned long long int)tycon,
746             n_ints, n_addrs, n_dtys, (int)ex );
747   }
748
749   /* Now safe to do MPI_Type_get_contents */
750   assert(n_ints  >= 0);
751   assert(n_addrs >= 0);
752   assert(n_dtys  >= 0);
753
754   if (n_ints  > 0) {
755      ints = malloc(n_ints * sizeof(int));
756      assert(ints);
757   }
758   if (n_addrs > 0) {
759      addrs = malloc(n_addrs * sizeof(MPI_Aint));
760      assert(addrs);
761   }
762   if (n_dtys  > 0) {
763      dtys = malloc(n_dtys * sizeof(MPI_Datatype));
764      assert(dtys);
765   }
766
767   r = PMPI_Type_get_contents( ty, n_ints, n_addrs, n_dtys,
768                                   ints, addrs, dtys );
769   assert(r == MPI_SUCCESS);
770
771   switch (tycon) {
772
773      case MPI_COMBINER_CONTIGUOUS:
774         assert(n_ints == 1 && n_addrs == 0 && n_dtys == 1);
775	 walk_type_array( f, base, dtys[0], ints[0] );
776         maybeFreeTy( &dtys[0] );
777         break;
778
779      case MPI_COMBINER_VECTOR:
780         assert(n_ints == 3 && n_addrs == 0 && n_dtys == 1);
781         ex = extentOfTy(dtys[0]);
782         if (0)
783         printf("vector count %d x (bl %d stride %d)\n",
784                (int)ints[0], (int)ints[1], (int)ints[2]);
785         for (i = 0; i < ints[0]; i++) {
786            walk_type_array( f, base + i * ints[2]/*stride*/ * ex,
787                                dtys[0], ints[1]/*blocklength*/ );
788         }
789         maybeFreeTy( &dtys[0] );
790         break;
791
792      case MPI_COMBINER_HVECTOR:
793         assert(n_ints == 2 && n_addrs == 1 && n_dtys == 1);
794         ex = extentOfTy(dtys[0]);
795         if (0)
796         printf("hvector count %d x (bl %d hstride %d)\n",
797                (int)ints[0], (int)ints[1], (int)addrs[0]);
798         for (i = 0; i < ints[0]; i++) {
799            walk_type_array( f, base + i * addrs[0]/*hstride*/,
800                                dtys[0], ints[1]/*blocklength*/ );
801         }
802         maybeFreeTy( &dtys[0] );
803         break;
804
805      case MPI_COMBINER_INDEXED:
806         assert(n_addrs == 0 && n_dtys == 1);
807         assert(n_ints > 0);
808         assert(n_ints == 2 * ints[0] + 1);
809         ex = extentOfTy(dtys[0]);
810         for (i = 0; i < ints[0]; i++) {
811            if (0)
812            printf("indexed (elem %d) off %d copies %d\n",
813                   (int)i, ints[i+1+ints[0]], ints[i+1] );
814            walk_type_array( f, base + ex * ints[i+1+ints[0]],
815                                dtys[0], ints[i+1] );
816         }
817         maybeFreeTy( &dtys[0] );
818         break;
819
820      case MPI_COMBINER_HINDEXED:
821         assert(n_ints > 0);
822         assert(n_ints == ints[0] + 1);
823         assert(n_addrs == ints[0] && n_dtys == 1);
824         ex = extentOfTy(dtys[0]);
825         for (i = 0; i < ints[0]; i++) {
826            if (0)
827            printf("hindexed (elem %d) hoff %d copies %d\n",
828                   (int)i, (int)addrs[i], ints[i+1] );
829            walk_type_array( f, base + addrs[i],
830                                dtys[0], ints[i+1] );
831         }
832         maybeFreeTy( &dtys[0] );
833         break;
834
835      case MPI_COMBINER_STRUCT:
836         assert(n_addrs == n_ints-1);
837         assert(n_dtys  == n_ints-1);
838         assert(n_ints > 0);
839         assert(n_ints == ints[0] + 1);
840	 for (i = 0; i < ints[0]; i++) {
841            if (0)
842            printf("struct (elem %d limit %d) hoff %d copies %d\n",
843                   (int)i, (int)ints[0], (int)addrs[i], (int)ints[i+1]);
844            walk_type_array( f, base + addrs[i], dtys[i], (long)ints[i+1] );
845            maybeFreeTy( &dtys[i] );
846	 }
847         break;
848
849      default:
850         goto unhandled;
851
852   }
853
854   /* normal exit */
855   if (ints)  free(ints);
856   if (addrs) free(addrs);
857   if (dtys)  free(dtys);
858   return;
859
860  unhandled:
861   /* Complain, but limit the amount of complaining that can happen to
862      the first 3 different unhandled tycons that show up, so as to
863      avoid swamping users with thousands of duplicate messages. */
864   if (complaints > 0 && tycon != last_complained_about_tycon) {
865      complaints--;
866      last_complained_about_tycon = tycon;
867      if (tycon == MPI_COMBINER_NAMED) {
868         fprintf(stderr, "%s %5d: walk_type: unhandled base type 0x%lx ",
869                         preamble, my_pid, (long)ty);
870         showTy(stderr, ty);
871         fprintf(stderr, "\n");
872      } else {
873         fprintf(stderr, "%s %5d: walk_type: unhandled combiner 0x%lx\n",
874                         preamble, my_pid, (long)tycon);
875      }
876   }
877   if (ints)  free(ints);
878   if (addrs) free(addrs);
879   if (dtys)  free(dtys);
880   if (opt_missing >= 2)
881      barf("walk_type: unhandled combiner, strict checking selected");
882}
883
884
885/* Same as walk_type but apply 'f' to every element in an array of
886   'count' items starting at 'base'.  The only purpose of pushing this
887   into a different routine is so it can attempt to optimise the case
888   where the array elements are contiguous and packed together without
889   holes. */
890static __inline__
891void walk_type_array ( void(*f)(void*,long), char* base,
892                       MPI_Datatype elemTy, long count )
893{
894   long i, ex;
895
896   assert(sizeof(unsigned long) == sizeof(char*));
897
898   /* First see if we can do this the fast way. */
899   ex = sizeofOneNamedTy(elemTy);
900
901   if ( /* ty is a primitive type with power-of-2 size */
902        (ex == 8 || ex == 4 || ex == 2 || ex == 1)
903        && /* base is suitably aligned for ty */
904           ( ((unsigned long)base) & (ex-1)) == 0)  {
905
906      /* We're sure it's contiguous, so just paint/check it in one
907         go. */
908     if (0) printf("walk_type_array fast %ld of size %ld\n", count, ex );
909     f ( base, count * ex );
910
911   } else {
912
913      /* Bad news.  We have to futz with each element individually.
914         This could be very expensive.
915
916         Note: subtle.  If ty is LONG_DOUBLE then the extent will be
917         12, so the following loop will jump along in steps of 12, but
918         the size painted by walk_type will be 10 since it uses
919         sizeofOneNamedTy to establish the size of base types.  Which
920         is what we need to happen. */
921      ex = extentOfTy(elemTy);
922      if (0) printf("walk_type_array SLOW %ld of size %ld\n", count, ex );
923      for (i = 0; i < count; i++)
924         walk_type( f, base + i * ex, elemTy );
925
926   }
927}
928
929
930/* Hook so it's visible from outside (can be handy to dlopen/dlsym
931   it) */
932void mpiwrap_walk_type_EXTERNALLY_VISIBLE
933    ( void(*f)(void*,long), char* base, MPI_Datatype ty )
934{
935   walk_type(f, base, ty);
936}
937
938
939/*------------------------------------------------------------*/
940/*--- Address-range helpers                                ---*/
941/*------------------------------------------------------------*/
942
943/* ----------------
944   Do corresponding checks on memory areas defined using a
945   straightforward (start, length) description.
946   ----------------
947*/
948
949static __inline__
950void check_mem_is_defined_untyped ( void* buffer, long nbytes )
951{
952   if (nbytes > 0) {
953      VALGRIND_CHECK_MEM_IS_DEFINED(buffer, nbytes);
954   }
955}
956
957static __inline__
958void check_mem_is_addressable_untyped ( void* buffer, long nbytes )
959{
960   if (nbytes > 0) {
961      VALGRIND_CHECK_MEM_IS_ADDRESSABLE(buffer, nbytes);
962   }
963}
964
965static __inline__
966void make_mem_defined_if_addressable_untyped ( void* buffer, long nbytes )
967{
968   if (nbytes > 0) {
969      VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(buffer, nbytes);
970   }
971}
972
973static __inline__
974void make_mem_defined_if_addressable_if_success_untyped ( int err,
975                                       void* buffer, long nbytes )
976{
977   if (err == MPI_SUCCESS && nbytes > 0) {
978      VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(buffer, nbytes);
979   }
980}
981
982
983/* ----------------
984   Do checks on memory areas defined using the MPI (buffer, count,
985   type) convention.
986   ----------------
987*/
988
989/* Check that the specified area is both addressible and contains
990   initialised data, and cause V to complain if not. */
991
992static __inline__
993void check_mem_is_defined ( char* buffer, long count, MPI_Datatype datatype )
994{
995   walk_type_array( check_mem_is_defined_untyped, buffer, datatype, count );
996}
997
998
999/* Check that the specified area is addressible, and cause V to
1000   complain if not. Doesn't matter whether the data there is
1001   initialised or not. */
1002
1003static __inline__
1004void check_mem_is_addressable ( void *buffer, long count, MPI_Datatype datatype )
1005{
1006   walk_type_array( check_mem_is_addressable_untyped, buffer, datatype, count );
1007}
1008
1009
1010/* Set the specified area to 'defined for each byte which is
1011   addressible' state. */
1012
1013static __inline__
1014void make_mem_defined_if_addressable ( void *buffer, int count, MPI_Datatype datatype )
1015{
1016   walk_type_array( make_mem_defined_if_addressable_untyped,
1017                    buffer, datatype, count );
1018}
1019
1020static __inline__
1021void
1022make_mem_defined_if_addressable_if_success ( int err, void *buffer, int count,
1023                                             MPI_Datatype datatype )
1024{
1025   if (err == MPI_SUCCESS)
1026      make_mem_defined_if_addressable(buffer, count, datatype);
1027}
1028
1029
1030/*------------------------------------------------------------*/
1031/*---                                                      ---*/
1032/*--- The wrappers proper.   They are listed in the order  ---*/
1033/*--- in which they appear in "MPI: A Message-Passing      ---*/
1034/*--- Interface Standard, MPIF, Nov 15 2003" (the MPI 1.1  ---*/
1035/*--- spec.  All unimplemented wrappers are listed at the  ---*/
1036/*--- end of the file.  The list of function names is      ---*/
1037/*--- taken from the headers of Open MPI svn r9191.        ---*/
1038/*--- Hopefully it is a complete list of all the MPI 2     ---*/
1039/*--- functions.                                           ---*/
1040/*---                                                      ---*/
1041/*------------------------------------------------------------*/
1042
1043/* Handy abbreviation */
1044#define WRAPPER_FOR(name) I_WRAP_FNNAME_U(name)
1045
1046/* Generates (conceptually) a wrapper which does nothing.  In
1047   fact just generate no wrapper at all. */
1048#define HAS_NO_WRAPPER(basename) /* */
1049
1050
1051/*------------------------------------------------------------*/
1052/*---                                                      ---*/
1053/*--- Sec 3.2, Blocking Send and Receive Operations        ---*/
1054/*---                                                      ---*/
1055/*------------------------------------------------------------*/
1056
1057/* --- {,B,S,R}Send --- */
1058/* pre: rd: (buf,count,datatype) */
1059static
1060int generic_Send(void *buf, int count, MPI_Datatype datatype,
1061                            int dest, int tag, MPI_Comm comm)
1062{
1063   OrigFn fn;
1064   int    err;
1065   VALGRIND_GET_ORIG_FN(fn);
1066   before("{,B,S,R}Send");
1067   check_mem_is_defined(buf, count, datatype);
1068   CALL_FN_W_6W(err, fn, buf,count,datatype,dest,tag,comm);
1069   after("{,B,S,R}Send", err);
1070   return err;
1071}
1072int WRAPPER_FOR(PMPI_Send)(void *buf, int count, MPI_Datatype datatype,
1073                           int dest, int tag, MPI_Comm comm) {
1074   return generic_Send(buf,count,datatype, dest,tag,comm);
1075}
1076int WRAPPER_FOR(PMPI_Bsend)(void *buf, int count, MPI_Datatype datatype,
1077                            int dest, int tag, MPI_Comm comm) {
1078   return generic_Send(buf,count,datatype, dest,tag,comm);
1079}
1080int WRAPPER_FOR(PMPI_Ssend)(void *buf, int count, MPI_Datatype datatype,
1081                            int dest, int tag, MPI_Comm comm) {
1082   return generic_Send(buf,count,datatype, dest,tag,comm);
1083}
1084int WRAPPER_FOR(PMPI_Rsend)(void *buf, int count, MPI_Datatype datatype,
1085                            int dest, int tag, MPI_Comm comm) {
1086   return generic_Send(buf,count,datatype, dest,tag,comm);
1087}
1088
1089/* --- Recv --- */
1090/* pre:  must be writable: (buf,count,datatype)
1091         must be writable: status
1092   post: make readable: (buf,recv_count,datatype)
1093         where recv_count is determined from *status
1094*/
1095int WRAPPER_FOR(PMPI_Recv)(void *buf, int count, MPI_Datatype datatype,
1096                           int source, int tag,
1097                           MPI_Comm comm, MPI_Status *status)
1098{
1099   OrigFn     fn;
1100   int        err, recv_count = 0;
1101   MPI_Status fake_status;
1102   VALGRIND_GET_ORIG_FN(fn);
1103   before("Recv");
1104   if (isMSI(status))
1105      status = &fake_status;
1106   check_mem_is_addressable(buf, count, datatype);
1107   check_mem_is_addressable_untyped(status, sizeof(*status));
1108   CALL_FN_W_7W(err, fn, buf,count,datatype,source,tag,comm,status);
1109   if (err == MPI_SUCCESS && count_from_Status(&recv_count,datatype,status)) {
1110      make_mem_defined_if_addressable(buf, recv_count, datatype);
1111   }
1112   after("Recv", err);
1113   return err;
1114}
1115
1116/* --- Get_count --- */
1117/* pre:  must be readable: *status
1118   post: make defined: *count -- don't bother, libmpi will surely do this
1119*/
1120int WRAPPER_FOR(PMPI_Get_count)(MPI_Status* status,
1121                                MPI_Datatype ty, int* count )
1122{
1123   OrigFn fn;
1124   int    err;
1125   VALGRIND_GET_ORIG_FN(fn);
1126   before("Get_count");
1127#  if defined(_AIX)
1128   check_mem_is_addressable_untyped(status, sizeof(*status));
1129#  else
1130   check_mem_is_defined_untyped(status, sizeof(*status));
1131#  endif
1132   CALL_FN_W_WWW(err, fn, status,ty,count);
1133   after("Get_count", err);
1134   return err;
1135}
1136
1137
1138/*------------------------------------------------------------*/
1139/*---                                                      ---*/
1140/*--- Sec 3.7, Nonblocking communication                   ---*/
1141/*---                                                      ---*/
1142/*------------------------------------------------------------*/
1143
1144/* Maintain a table that makes it possible for the wrappers to
1145   complete MPI_Irecv successfully.
1146
1147   The issue is that MPI_Irecv states the recv buffer and returns
1148   immediately, giving a handle (MPI_Request) for the transaction.
1149   Later the user will have to poll for completion with MPI_Wait etc,
1150   and at that point these wrappers have to paint the recv buffer.
1151   But the recv buffer details are not presented to MPI_Wait - only
1152   the handle is.  We therefore have to use a shadow table
1153   (sReqs{,_size,_used,_lock}) which associates uncompleted
1154   MPI_Requests with the corresponding buffer address/count/type.
1155
1156   Only read requests are placed in the table, since there is no need
1157   to do any buffer painting following completion of an Isend - all
1158   the checks for that are done at the time Isend is called.
1159
1160   Care has to be take to remove completed requests from the table.
1161
1162   Access to the table is guarded by sReqs_lock so as to make it
1163   thread-safe.
1164*/
1165
1166typedef
1167   struct {
1168      Bool         inUse;
1169      MPI_Request  key;
1170      void*        buf;
1171      int          count;
1172      MPI_Datatype datatype;
1173   }
1174   ShadowRequest;
1175
1176static ShadowRequest*  sReqs      = NULL;
1177static int             sReqs_size = 0;
1178static int             sReqs_used = 0;
1179static pthread_mutex_t sReqs_lock = PTHREAD_MUTEX_INITIALIZER;
1180
1181#define LOCK_SREQS                                  \
1182  do { int pr = pthread_mutex_lock(&sReqs_lock);    \
1183       assert(pr == 0);                             \
1184  } while (0)
1185
1186#define UNLOCK_SREQS                                \
1187  do { int pr = pthread_mutex_unlock(&sReqs_lock);  \
1188       assert(pr == 0);                             \
1189  } while (0)
1190
1191
1192/* Ensure the sReqs expandable array has at least one free slot, by
1193   copying it into a larger one if necessary.  NOTE: sReqs_lock is
1194   held throughout this procedure.*/
1195static void ensure_sReq_space ( void )
1196{
1197   int            i;
1198   ShadowRequest* sReqs2;
1199   if (sReqs_used == sReqs_size) {
1200      sReqs_size = sReqs_size==0 ? 2 : 2*sReqs_size;
1201      sReqs2 = malloc( sReqs_size * sizeof(ShadowRequest) );
1202      if (sReqs2 == NULL) {
1203         UNLOCK_SREQS;
1204         barf("add_shadow_Request: malloc failed.\n");
1205      }
1206      for (i = 0; i < sReqs_used; i++)
1207         sReqs2[i] = sReqs[i];
1208      if (sReqs)
1209         free(sReqs);
1210      sReqs = sReqs2;
1211   }
1212   assert(sReqs_used < sReqs_size);
1213}
1214
1215
1216/* Find shadow info for 'request', or NULL if none. */
1217
1218static
1219ShadowRequest* find_shadow_Request ( MPI_Request request )
1220{
1221   ShadowRequest* ret = NULL;
1222   int i;
1223   LOCK_SREQS;
1224   for (i = 0; i < sReqs_used; i++) {
1225      if (sReqs[i].inUse && eq_MPI_Request(sReqs[i].key,request)) {
1226         ret = &sReqs[i];
1227         break;
1228      }
1229   }
1230   UNLOCK_SREQS;
1231   return ret;
1232}
1233
1234
1235/* Delete shadow info for 'request', if any. */
1236
1237static void delete_shadow_Request ( MPI_Request request )
1238{
1239   int i;
1240   LOCK_SREQS;
1241   for (i = 0; i < sReqs_used; i++) {
1242      if (sReqs[i].inUse && eq_MPI_Request(sReqs[i].key,request)) {
1243         sReqs[i].inUse = False;
1244         break;
1245      }
1246   }
1247   UNLOCK_SREQS;
1248}
1249
1250
1251/* Add a shadow for 'request', overwriting any old binding for it. */
1252
1253static
1254void add_shadow_Request( MPI_Request request,
1255                         void* buf, int count,
1256                         MPI_Datatype datatype )
1257{
1258   int i, ix = -1;
1259   LOCK_SREQS;
1260   assert(sReqs_used >= 0);
1261   assert(sReqs_size >= 0);
1262   assert(sReqs_used <= sReqs_size);
1263   if (sReqs == NULL) assert(sReqs_size == 0);
1264
1265   /* First of all see if we already have a binding for this key; if
1266      so just replace it, and have done. */
1267   for (i = 0; i < sReqs_used; i++) {
1268      if (sReqs[i].inUse && eq_MPI_Request(sReqs[i].key,request)) {
1269         ix = i;
1270         break;
1271      }
1272   }
1273
1274   if (ix < 0) {
1275      /* Ok, we don't have it, so will have to add it.  First search
1276         to see if there is an existing empty slot. */
1277      for (i = 0; i < sReqs_used; i++) {
1278         if (!sReqs[i].inUse) {
1279            ix = i;
1280            break;
1281         }
1282      }
1283   }
1284
1285   /* No empty slots.  Allocate a new one. */
1286   if (ix < 0) {
1287      ensure_sReq_space();
1288      assert(sReqs_used < sReqs_size);
1289      ix = sReqs_used;
1290      sReqs_used++;
1291   }
1292
1293   assert(ix >= 0 && ix < sReqs_used);
1294   assert(sReqs_used <= sReqs_size);
1295
1296   sReqs[ix].inUse    = True;
1297   sReqs[ix].key      = request;
1298   sReqs[ix].buf      = buf;
1299   sReqs[ix].count    = count;
1300   sReqs[ix].datatype = datatype;
1301
1302   UNLOCK_SREQS;
1303   if (opt_verbosity > 1)
1304      fprintf(stderr, "%s %5d: sReq+ 0x%lx -> b/c/d %p/%d/0x%lx [slot %d]\n",
1305                      preamble, my_pid, (unsigned long)request,
1306                                buf, count, (long)datatype, ix);
1307}
1308
1309static
1310MPI_Request* clone_Request_array ( int count, MPI_Request* orig )
1311{
1312   MPI_Request* copy;
1313   int i;
1314   LOCK_SREQS;
1315   if (count < 0)
1316      count = 0; /* Hmm.  Call Mulder and Scully. */
1317   copy = malloc( count * sizeof(MPI_Request) );
1318   if (copy == NULL && count > 0) {
1319      UNLOCK_SREQS;
1320      barf("clone_Request_array: malloc failed");
1321   }
1322   for (i = 0; i < count; i++)
1323      copy[i] = orig[i];
1324   UNLOCK_SREQS;
1325   return copy;
1326}
1327
1328#undef LOCK_SREQS
1329#undef UNLOCK_SREQS
1330
1331
1332static void maybe_complete ( Bool         error_in_status,
1333                             MPI_Request  request_before,
1334                             MPI_Request  request_after,
1335                             MPI_Status*  status )
1336{
1337   int recv_count = 0;
1338   ShadowRequest* shadow;
1339   /* How do we know if this is an Irecv request that has now
1340      finished successfully?
1341
1342      request_before isn't MPI_REQUEST_NULL
1343      and request_before is found in the shadow table
1344      and request_after *is* MPI_REQUEST_NULL
1345      and (if error_in_status then status.MPI_ERROR is MPI_SUCCESS)
1346
1347      (when error_in_status == False, then we expect not to get
1348      called at all if there was an error.)
1349   */
1350   if (request_before != MPI_REQUEST_NULL
1351       && request_after == MPI_REQUEST_NULL
1352       && (error_in_status ? status->MPI_ERROR == MPI_SUCCESS : True)
1353       && ( (shadow=find_shadow_Request(request_before)) != NULL) ) {
1354      /* The Irecv detailed in 'shadow' completed.  Paint the result
1355         buffer, and delete the entry. */
1356      if (count_from_Status(&recv_count, shadow->datatype, status)) {
1357         make_mem_defined_if_addressable(shadow->buf, recv_count, shadow->datatype);
1358         if (opt_verbosity > 1)
1359            fprintf(stderr, "%s %5d: sReq- %p (completed)\n",
1360                            preamble, my_pid, request_before);
1361      }
1362      delete_shadow_Request(request_before);
1363   }
1364}
1365
1366
1367/* --- Isend --- */
1368/* rd: (buf,count,datatype) */
1369/* wr: *request */
1370static __inline__
1371int generic_Isend(void *buf, int count, MPI_Datatype datatype,
1372                             int dest, int tag, MPI_Comm comm,
1373                             MPI_Request* request)
1374{
1375   OrigFn fn;
1376   int    err;
1377   VALGRIND_GET_ORIG_FN(fn);
1378   before("{,B,S,R}Isend");
1379   check_mem_is_defined(buf, count, datatype);
1380   check_mem_is_addressable_untyped(request, sizeof(*request));
1381   CALL_FN_W_7W(err, fn, buf,count,datatype,dest,tag,comm,request);
1382   make_mem_defined_if_addressable_if_success_untyped(err, request, sizeof(*request));
1383   after("{,B,S,R}Isend", err);
1384   return err;
1385}
1386int WRAPPER_FOR(PMPI_Isend)(void *buf, int count, MPI_Datatype datatype,
1387                            int dest, int tag, MPI_Comm comm,
1388                            MPI_Request* request) {
1389   return generic_Isend(buf,count,datatype, dest,tag,comm, request);
1390}
1391int WRAPPER_FOR(PMPI_Ibsend)(void *buf, int count, MPI_Datatype datatype,
1392                             int dest, int tag, MPI_Comm comm,
1393                             MPI_Request* request) {
1394   return generic_Isend(buf,count,datatype, dest,tag,comm, request);
1395}
1396int WRAPPER_FOR(PMPI_Issend)(void *buf, int count, MPI_Datatype datatype,
1397                             int dest, int tag, MPI_Comm comm,
1398                             MPI_Request* request) {
1399   return generic_Isend(buf,count,datatype, dest,tag,comm, request);
1400}
1401int WRAPPER_FOR(PMPI_Irsend)(void *buf, int count, MPI_Datatype datatype,
1402                             int dest, int tag, MPI_Comm comm,
1403                             MPI_Request* request) {
1404   return generic_Isend(buf,count,datatype, dest,tag,comm, request);
1405}
1406
1407
1408/* --- Irecv --- */
1409/* pre:  must be writable: (buf,count,datatype), *request
1410   post: make readable *request
1411         add a request->(buf,count,ty) binding to the
1412         shadow request table.
1413*/
1414int WRAPPER_FOR(PMPI_Irecv)( void* buf, int count, MPI_Datatype datatype,
1415                             int source, int tag, MPI_Comm comm,
1416                             MPI_Request* request )
1417{
1418   OrigFn fn;
1419   int    err;
1420   VALGRIND_GET_ORIG_FN(fn);
1421   before("Irecv");
1422   check_mem_is_addressable(buf, count, datatype);
1423   check_mem_is_addressable_untyped(request, sizeof(*request));
1424   CALL_FN_W_7W(err, fn, buf,count,datatype,source,tag,comm,request);
1425   if (err == MPI_SUCCESS) {
1426      make_mem_defined_if_addressable_untyped(request, sizeof(*request));
1427      add_shadow_Request( *request, buf,count,datatype );
1428   }
1429   after("Irecv", err);
1430   return err;
1431}
1432
1433/* --- Wait --- */
1434/* The MPI1 spec (imprecisely) defines 3 request states:
1435   - "null"     if the request is MPI_REQUEST_NULL
1436   - "inactive" if not "null" and not associated with ongoing comms
1437   - "active"   if not "null" and is associated with ongoing comms
1438*/
1439int WRAPPER_FOR(PMPI_Wait)( MPI_Request* request,
1440                            MPI_Status* status )
1441{
1442   MPI_Request  request_before;
1443   MPI_Status   fake_status;
1444   OrigFn       fn;
1445   int          err;
1446   VALGRIND_GET_ORIG_FN(fn);
1447   before("Wait");
1448   if (isMSI(status))
1449      status = &fake_status;
1450   check_mem_is_addressable_untyped(status, sizeof(MPI_Status));
1451   check_mem_is_defined_untyped(request, sizeof(MPI_Request));
1452   request_before = *request;
1453   CALL_FN_W_WW(err, fn, request,status);
1454   if (err == MPI_SUCCESS) {
1455      maybe_complete(False/*err in status?*/,
1456                     request_before, *request, status);
1457      make_mem_defined_if_addressable_untyped(status, sizeof(MPI_Status));
1458   }
1459   after("Wait", err);
1460   return err;
1461}
1462
1463/* --- Waitany --- */
1464int WRAPPER_FOR(PMPI_Waitany)( int count,
1465                               MPI_Request* requests,
1466                               int* index,
1467                               MPI_Status* status )
1468{
1469   MPI_Request* requests_before = NULL;
1470   MPI_Status   fake_status;
1471   OrigFn       fn;
1472   int          err, i;
1473   VALGRIND_GET_ORIG_FN(fn);
1474   before("Waitany");
1475   if (isMSI(status))
1476      status = &fake_status;
1477   if (0) fprintf(stderr, "Waitany: %d\n", count);
1478   check_mem_is_addressable_untyped(index, sizeof(int));
1479   check_mem_is_addressable_untyped(status, sizeof(MPI_Status));
1480   for (i = 0; i < count; i++) {
1481      check_mem_is_defined_untyped(&requests[i], sizeof(MPI_Request));
1482   }
1483   requests_before = clone_Request_array( count, requests );
1484   CALL_FN_W_WWWW(err, fn, count,requests,index,status);
1485   if (err == MPI_SUCCESS && *index >= 0 && *index < count) {
1486      maybe_complete(False/*err in status?*/,
1487                     requests_before[*index], requests[*index], status);
1488      make_mem_defined_if_addressable_untyped(status, sizeof(MPI_Status));
1489   }
1490   if (requests_before)
1491      free(requests_before);
1492   after("Waitany", err);
1493   return err;
1494}
1495
1496/* --- Waitall --- */
1497int WRAPPER_FOR(PMPI_Waitall)( int count,
1498                               MPI_Request* requests,
1499                               MPI_Status* statuses )
1500{
1501   MPI_Request* requests_before = NULL;
1502   OrigFn       fn;
1503   int          err, i;
1504   Bool         free_sta = False;
1505   VALGRIND_GET_ORIG_FN(fn);
1506   before("Waitall");
1507   if (0) fprintf(stderr, "Waitall: %d\n", count);
1508   if (isMSI(statuses)) {
1509      free_sta = True;
1510      statuses = malloc( (count < 0 ? 0 : count) * sizeof(MPI_Status) );
1511   }
1512   for (i = 0; i < count; i++) {
1513      check_mem_is_addressable_untyped(&statuses[i], sizeof(MPI_Status));
1514      check_mem_is_defined_untyped(&requests[i], sizeof(MPI_Request));
1515   }
1516   requests_before = clone_Request_array( count, requests );
1517   CALL_FN_W_WWW(err, fn, count,requests,statuses);
1518   if (err == MPI_SUCCESS /*complete success*/
1519       || err == MPI_ERR_IN_STATUS /* partial success */) {
1520      Bool e_i_s = err == MPI_ERR_IN_STATUS;
1521      for (i = 0; i < count; i++) {
1522         maybe_complete(e_i_s, requests_before[i], requests[i],
1523                               &statuses[i]);
1524         make_mem_defined_if_addressable_untyped(&statuses[i],
1525                                                 sizeof(MPI_Status));
1526      }
1527   }
1528   if (requests_before)
1529      free(requests_before);
1530   if (free_sta)
1531      free(statuses);
1532   after("Waitall", err);
1533   return err;
1534}
1535
1536/* --- Test --- */
1537/* nonblocking version of Wait */
1538int WRAPPER_FOR(PMPI_Test)( MPI_Request* request, int* flag,
1539                            MPI_Status* status )
1540{
1541   MPI_Request  request_before;
1542   MPI_Status   fake_status;
1543   OrigFn       fn;
1544   int          err;
1545   VALGRIND_GET_ORIG_FN(fn);
1546   before("Test");
1547   if (isMSI(status))
1548      status = &fake_status;
1549   check_mem_is_addressable_untyped(status, sizeof(MPI_Status));
1550   check_mem_is_addressable_untyped(flag, sizeof(int));
1551   check_mem_is_defined_untyped(request, sizeof(MPI_Request));
1552   request_before = *request;
1553   CALL_FN_W_WWW(err, fn, request,flag,status);
1554   if (err == MPI_SUCCESS && *flag) {
1555      maybe_complete(False/*err in status?*/,
1556                     request_before, *request, status);
1557      make_mem_defined_if_addressable_untyped(status, sizeof(MPI_Status));
1558   }
1559   after("Test", err);
1560   return err;
1561}
1562
1563/* --- Testall --- */
1564/* nonblocking version of Waitall */
1565int WRAPPER_FOR(PMPI_Testall)( int count, MPI_Request* requests,
1566                               int* flag, MPI_Status* statuses )
1567{
1568   MPI_Request* requests_before = NULL;
1569   OrigFn       fn;
1570   int          err, i;
1571   Bool         free_sta = False;
1572   VALGRIND_GET_ORIG_FN(fn);
1573   before("Testall");
1574   if (0) fprintf(stderr, "Testall: %d\n", count);
1575   if (isMSI(statuses)) {
1576      free_sta = True;
1577      statuses = malloc( (count < 0 ? 0 : count) * sizeof(MPI_Status) );
1578   }
1579   check_mem_is_addressable_untyped(flag, sizeof(int));
1580   for (i = 0; i < count; i++) {
1581      check_mem_is_addressable_untyped(&statuses[i], sizeof(MPI_Status));
1582      check_mem_is_defined_untyped(&requests[i], sizeof(MPI_Request));
1583   }
1584   requests_before = clone_Request_array( count, requests );
1585   CALL_FN_W_WWWW(err, fn, count,requests,flag,statuses);
1586   /* Urk.  Is the following "if (...)" really right?  I don't know. */
1587   if (*flag
1588       && (err == MPI_SUCCESS /*complete success*/
1589           || err == MPI_ERR_IN_STATUS /* partial success */)) {
1590      Bool e_i_s = err == MPI_ERR_IN_STATUS;
1591      for (i = 0; i < count; i++) {
1592         maybe_complete(e_i_s, requests_before[i], requests[i],
1593                               &statuses[i]);
1594         make_mem_defined_if_addressable_untyped(&statuses[i],
1595                                                 sizeof(MPI_Status));
1596      }
1597   }
1598   if (requests_before)
1599      free(requests_before);
1600   if (free_sta)
1601      free(statuses);
1602   after("Testall", err);
1603   return err;
1604}
1605
1606/* --- Iprobe --- */
1607/* pre:  must-be-writable: *flag, *status */
1608/* post: make-readable *flag
1609         if *flag==True  make-defined *status */
1610int WRAPPER_FOR(PMPI_Iprobe)(int source, int tag,
1611                             MPI_Comm comm,
1612                             int* flag, MPI_Status* status)
1613{
1614   MPI_Status fake_status;
1615   OrigFn     fn;
1616   int        err;
1617   VALGRIND_GET_ORIG_FN(fn);
1618   before("Iprobe");
1619   if (isMSI(status))
1620      status = &fake_status;
1621   check_mem_is_addressable_untyped(flag, sizeof(*flag));
1622   check_mem_is_addressable_untyped(status, sizeof(*status));
1623   CALL_FN_W_5W(err, fn, source,tag,comm,flag,status);
1624   if (err == MPI_SUCCESS) {
1625      make_mem_defined_if_addressable_untyped(flag, sizeof(*flag));
1626      if (*flag)
1627         make_mem_defined_if_addressable_untyped(status, sizeof(*status));
1628   }
1629   after("Iprobe", err);
1630   return err;
1631}
1632
1633/* --- Probe --- */
1634/* pre:  must-be-writable *status */
1635/* post: make-defined *status */
1636int WRAPPER_FOR(PMPI_Probe)(int source, int tag,
1637                            MPI_Comm comm, MPI_Status* status)
1638{
1639   MPI_Status fake_status;
1640   OrigFn     fn;
1641   int        err;
1642   VALGRIND_GET_ORIG_FN(fn);
1643   before("Probe");
1644   if (isMSI(status))
1645      status = &fake_status;
1646   check_mem_is_addressable_untyped(status, sizeof(*status));
1647   CALL_FN_W_WWWW(err, fn, source,tag,comm,status);
1648   make_mem_defined_if_addressable_if_success_untyped(err, status, sizeof(*status));
1649   after("Probe", err);
1650   return err;
1651}
1652
1653/* --- Cancel --- */
1654/* Wrapping PMPI_Cancel is interesting only to the extent that we need
1655   to be able to detect when a request should be removed from our
1656   shadow table due to cancellation. */
1657int WRAPPER_FOR(PMPI_Cancel)(MPI_Request* request)
1658{
1659   OrigFn      fn;
1660   int         err;
1661   MPI_Request tmp;
1662   VALGRIND_GET_ORIG_FN(fn);
1663   before("Cancel");
1664   check_mem_is_addressable_untyped(request, sizeof(*request));
1665   tmp = *request;
1666   CALL_FN_W_W(err, fn, request);
1667   if (err == MPI_SUCCESS)
1668      delete_shadow_Request(tmp);
1669   after("Cancel", err);
1670   return err;
1671}
1672
1673
1674/*------------------------------------------------------------*/
1675/*---                                                      ---*/
1676/*--- Sec 3.10, Send-receive                               ---*/
1677/*---                                                      ---*/
1678/*------------------------------------------------------------*/
1679
1680/* --- Sendrecv --- */
1681/* pre: must be readable: (sendbuf,sendcount,sendtype)
1682        must be writable: (recvbuf,recvcount,recvtype)
1683   post: make readable: (recvbuf,recvcount_actual,datatype)
1684         where recvcount_actual is determined from *status
1685*/
1686int WRAPPER_FOR(PMPI_Sendrecv)(
1687       void *sendbuf, int sendcount, MPI_Datatype sendtype,
1688       int dest, int sendtag,
1689       void *recvbuf, int recvcount, MPI_Datatype recvtype,
1690       int source, int recvtag,
1691       MPI_Comm comm,  MPI_Status *status)
1692{
1693   MPI_Status fake_status;
1694   OrigFn     fn;
1695   int        err, recvcount_actual = 0;
1696   VALGRIND_GET_ORIG_FN(fn);
1697   before("Sendrecv");
1698   if (isMSI(status))
1699      status = &fake_status;
1700   check_mem_is_defined(sendbuf, sendcount, sendtype);
1701   check_mem_is_addressable(recvbuf, recvcount, recvtype);
1702   check_mem_is_addressable_untyped(status, sizeof(*status));
1703   CALL_FN_W_12W(err, fn, sendbuf,sendcount,sendtype,dest,sendtag,
1704                          recvbuf,recvcount,recvtype,source,recvtag,
1705                          comm,status);
1706   if (err == MPI_SUCCESS
1707       && count_from_Status(&recvcount_actual,recvtype,status)) {
1708      make_mem_defined_if_addressable(recvbuf, recvcount_actual, recvtype);
1709   }
1710   after("Sendrecv", err);
1711   return err;
1712}
1713
1714
1715/*------------------------------------------------------------*/
1716/*---                                                      ---*/
1717/*--- Sec 3.12, Derived datatypes                          ---*/
1718/*---                                                      ---*/
1719/*------------------------------------------------------------*/
1720
1721/* --- Address --- */
1722/* Does this have anything worth checking? */
1723HAS_NO_WRAPPER(Address)
1724
1725/* --- MPI 2 stuff --- */
1726/* Type_extent, Type_get_contents and Type_get_envelope sometimes get
1727   used intensively by the type walker (walk_type).  There's no reason
1728   why they couldn't be properly wrapped if needed, but doing so slows
1729   everything down, so don't bother until needed. */
1730HAS_NO_WRAPPER(Type_extent)
1731HAS_NO_WRAPPER(Type_get_contents)
1732HAS_NO_WRAPPER(Type_get_envelope)
1733
1734/* --- Type_commit --- */
1735int WRAPPER_FOR(PMPI_Type_commit)( MPI_Datatype* ty )
1736{
1737   OrigFn fn;
1738   int    err;
1739   VALGRIND_GET_ORIG_FN(fn);
1740   before("Type_commit");
1741   check_mem_is_defined_untyped(ty, sizeof(*ty));
1742   CALL_FN_W_W(err, fn, ty);
1743   after("Type_commit", err);
1744   return err;
1745}
1746
1747/* --- Type_free --- */
1748int WRAPPER_FOR(PMPI_Type_free)( MPI_Datatype* ty )
1749{
1750   OrigFn fn;
1751   int    err;
1752   VALGRIND_GET_ORIG_FN(fn);
1753   before("Type_free");
1754   check_mem_is_defined_untyped(ty, sizeof(*ty));
1755   CALL_FN_W_W(err, fn, ty);
1756   after("Type_free", err);
1757   return err;
1758}
1759
1760
1761/*------------------------------------------------------------*/
1762/*---                                                      ---*/
1763/*--- Sec 3.13, Pack and unpack                            ---*/
1764/*---                                                      ---*/
1765/*------------------------------------------------------------*/
1766
1767/* --- Pack --- */
1768/* pre: must be readable: position
1769        must be readable: (inbuf,incount,datatype)
1770        must be writable: outbuf[0 .. outsize-1]
1771        must be writable: outbuf[*position ..
1772                                 *position - 1
1773                                 + however much space PMPI_Pack_size
1774                                   says we will need]
1775   post: make readable: outbuf[old *position .. new *position]
1776*/
1777int WRAPPER_FOR(PMPI_Pack)( void* inbuf, int incount, MPI_Datatype datatype,
1778                            void* outbuf, int outsize,
1779                            int* position, MPI_Comm comm )
1780{
1781   OrigFn fn;
1782   int    err, szB = 0;
1783   int    position_ORIG = *position;
1784   VALGRIND_GET_ORIG_FN(fn);
1785   before("Pack");
1786   /* stay sane */
1787   check_mem_is_defined_untyped(position, sizeof(*position));
1788   /* check input */
1789   check_mem_is_defined(inbuf, incount, datatype);
1790   /* check output area's stated bounds make sense */
1791   check_mem_is_addressable_untyped(outbuf, outsize);
1792   /* check output area's actual used size properly */
1793   err = PMPI_Pack_size( incount, datatype, comm, &szB );
1794   if (err == MPI_SUCCESS && szB > 0) {
1795      check_mem_is_addressable_untyped(
1796         ((char*)outbuf) + position_ORIG, szB
1797      );
1798   }
1799
1800   CALL_FN_W_7W(err, fn, inbuf,incount,datatype, outbuf,outsize,position, comm);
1801
1802   if (err == MPI_SUCCESS && (*position) > position_ORIG) {
1803      /* paint output */
1804      make_mem_defined_if_addressable_untyped(
1805         ((char*)outbuf) + position_ORIG, *position - position_ORIG
1806      );
1807   }
1808   after("Pack", err);
1809   return err;
1810}
1811
1812/* --- Unpack --- */
1813/* pre: must be readable: position
1814        must be writable: (outbuf,outcount,datatype)
1815        must be writable: outbuf[0 .. outsize-1]
1816        must be writable: outbuf[*position ..
1817                                 *position - 1
1818                                 + however much space PMPI_Pack_size
1819                                   says we will need]
1820   post: make readable: (outbuf,outcount,datatype)
1821         and also do a readability check of
1822         inbuf[old *position .. new *position]
1823*/
1824int WRAPPER_FOR(PMPI_Unpack)( void* inbuf, int insize, int* position,
1825                              void* outbuf, int outcount, MPI_Datatype datatype,
1826                              MPI_Comm comm )
1827{
1828   OrigFn fn;
1829   int    err, szB = 0;
1830   int    position_ORIG = *position;
1831   VALGRIND_GET_ORIG_FN(fn);
1832   before("Unpack");
1833   /* stay sane */
1834   check_mem_is_defined_untyped(position, sizeof(*position));
1835   /* check output area is accessible */
1836   check_mem_is_addressable(outbuf, outcount, datatype);
1837   /* check input area's stated bounds make sense */
1838   check_mem_is_addressable_untyped(inbuf, insize);
1839   /* check input area's actual used size properly */
1840   err = PMPI_Pack_size( outcount, datatype, comm, &szB );
1841   if (err == MPI_SUCCESS && szB > 0) {
1842      check_mem_is_addressable_untyped(
1843         ((char*)inbuf) + position_ORIG, szB
1844      );
1845   }
1846
1847   CALL_FN_W_7W(err, fn, inbuf,insize,position, outbuf,outcount,datatype, comm);
1848
1849   if (err == MPI_SUCCESS && (*position) > position_ORIG) {
1850      /* recheck input more carefully */
1851      check_mem_is_defined_untyped(
1852         ((char*)inbuf) + position_ORIG, *position - position_ORIG
1853      );
1854      /* paint output */
1855      make_mem_defined_if_addressable( outbuf, outcount, datatype );
1856   }
1857   after("Unpack", err);
1858   return err;
1859}
1860
1861
1862/*------------------------------------------------------------*/
1863/*---                                                      ---*/
1864/*--- Sec 4.4, Broadcast                                   ---*/
1865/*---                                                      ---*/
1866/*------------------------------------------------------------*/
1867
1868/* --- Bcast --- */
1869/* pre:  must-be-readable (buffer,count,datatype) for rank==root
1870         must-be-writable (buffer,count,datatype) for rank!=root
1871   post: make-readable (buffer,count,datatype) for all
1872
1873   Resulting behaviour is: if root sends uninitialised stuff, then
1874   V complains, but then all ranks, including itself, see the buffer
1875   as initialised after that.
1876*/
1877int WRAPPER_FOR(PMPI_Bcast)(void *buffer, int count,
1878                            MPI_Datatype datatype,
1879                            int root, MPI_Comm comm)
1880{
1881   OrigFn fn;
1882   int    err;
1883   Bool  i_am_sender;
1884   VALGRIND_GET_ORIG_FN(fn);
1885   before("Bcast");
1886   i_am_sender = root == comm_rank(comm);
1887   if (i_am_sender) {
1888      check_mem_is_defined(buffer, count, datatype);
1889   } else {
1890      check_mem_is_addressable(buffer, count, datatype);
1891   }
1892   CALL_FN_W_5W(err, fn, buffer,count,datatype,root,comm);
1893   make_mem_defined_if_addressable_if_success(err, buffer, count, datatype);
1894   after("Bcast", err);
1895   return err;
1896}
1897
1898
1899/*------------------------------------------------------------*/
1900/*---                                                      ---*/
1901/*--- Sec 4.5, Gather                                      ---*/
1902/*---                                                      ---*/
1903/*------------------------------------------------------------*/
1904
1905/* --- Gather --- */
1906/* JRS 20060217: I don't really understand this.  Each process is
1907   going to send sendcount items of type sendtype to the root.  So
1908   the root is going to receive comm_size*sendcount items of type
1909   sendtype (right?)  So why specify recvcount and recvtype?
1910
1911   Anyway, assuming the MPI Spec is correct (seems likely :-) we have:
1912
1913   pre:  (all)        must be readable: (sendbuf,sendcount,sendtype)
1914         (root only): must be writable: (recvbuf,recvcount * comm_size,recvtype)
1915   post: (root only): make readable: (recvbuf,recvcount * comm_size,recvtype)
1916*/
1917int WRAPPER_FOR(PMPI_Gather)(
1918       void *sendbuf, int sendcount, MPI_Datatype sendtype,
1919       void *recvbuf, int recvcount, MPI_Datatype recvtype,
1920       int root, MPI_Comm comm)
1921{
1922   OrigFn fn;
1923   int    err, me, sz;
1924   VALGRIND_GET_ORIG_FN(fn);
1925   before("Gather");
1926   me = comm_rank(comm);
1927   sz = comm_size(comm);
1928   check_mem_is_defined(sendbuf, sendcount, sendtype);
1929   if (me == root)
1930      check_mem_is_addressable(recvbuf, recvcount * sz, recvtype);
1931   CALL_FN_W_8W(err, fn, sendbuf,sendcount,sendtype,
1932                         recvbuf,recvcount,recvtype,
1933                         root,comm);
1934   if (me == root)
1935      make_mem_defined_if_addressable_if_success(err, recvbuf, recvcount * sz, recvtype);
1936   after("Gather", err);
1937   return err;
1938}
1939
1940
1941/*------------------------------------------------------------*/
1942/*---                                                      ---*/
1943/*--- Sec 4.6, Scatter                                     ---*/
1944/*---                                                      ---*/
1945/*------------------------------------------------------------*/
1946
1947/* pre:  (root only): must be readable: (sendbuf,sendcount * comm_size,sendtype)
1948         (all):       must be writable: (recvbuf,recvbuf,recvtype)
1949   post: (all):       make defined: (recvbuf,recvbuf,recvtype)
1950*/
1951int WRAPPER_FOR(PMPI_Scatter)(
1952       void* sendbuf, int sendcount, MPI_Datatype sendtype,
1953       void* recvbuf, int recvcount, MPI_Datatype recvtype,
1954       int root, MPI_Comm comm)
1955{
1956   OrigFn fn;
1957   int    err, me, sz;
1958   VALGRIND_GET_ORIG_FN(fn);
1959   before("Scatter");
1960   me = comm_rank(comm);
1961   sz = comm_size(comm);
1962   check_mem_is_addressable(recvbuf, recvcount, recvtype);
1963   if (me == root)
1964      check_mem_is_defined(sendbuf, sendcount * sz, sendtype);
1965   CALL_FN_W_8W(err, fn, sendbuf,sendcount,sendtype,
1966                         recvbuf,recvcount,recvtype,
1967                         root,comm);
1968   make_mem_defined_if_addressable_if_success(err, recvbuf, recvcount, recvtype);
1969   after("Scatter", err);
1970   return err;
1971}
1972
1973
1974/*------------------------------------------------------------*/
1975/*---                                                      ---*/
1976/*--- Sec 4.8, All-to-All Scatter/Gather                   ---*/
1977/*---                                                      ---*/
1978/*------------------------------------------------------------*/
1979
1980/* pre:  (all) must be readable: (sendbuf,sendcount * comm_size,sendtype)
1981         (all) must be writable: (recvbuf,recvcount * comm_size,recvtype)
1982   post: (all) make defined:     (recvbuf,recvcount * comm_size,recvtype)
1983*/
1984int WRAPPER_FOR(PMPI_Alltoall)(
1985       void* sendbuf, int sendcount, MPI_Datatype sendtype,
1986       void* recvbuf, int recvcount, MPI_Datatype recvtype,
1987       MPI_Comm comm)
1988{
1989   OrigFn fn;
1990   int    err, sz;
1991   VALGRIND_GET_ORIG_FN(fn);
1992   before("Alltoall");
1993   sz = comm_size(comm);
1994   check_mem_is_defined(sendbuf, sendcount * sz, sendtype);
1995   check_mem_is_addressable(recvbuf, recvcount * sz, recvtype);
1996   CALL_FN_W_7W(err, fn, sendbuf,sendcount,sendtype,
1997                         recvbuf,recvcount,recvtype,
1998                         comm);
1999   make_mem_defined_if_addressable_if_success(err, recvbuf, recvcount * sz, recvtype);
2000   after("Alltoall", err);
2001   return err;
2002}
2003
2004
2005/*------------------------------------------------------------*/
2006/*---                                                      ---*/
2007/*--- Sec 4.9, Global Reduction Operations                 ---*/
2008/*---                                                      ---*/
2009/*------------------------------------------------------------*/
2010
2011/* --- Reduce --- */
2012/* rd: (sendbuf,count,datatype) for all
2013   wr: (recvbuf,count,datatype) but only for rank == root
2014*/
2015int WRAPPER_FOR(PMPI_Reduce)(void *sendbuf, void *recvbuf,
2016                             int count,
2017                             MPI_Datatype datatype, MPI_Op op,
2018                             int root, MPI_Comm comm)
2019{
2020   OrigFn fn;
2021   int    err;
2022   Bool  i_am_root;
2023   VALGRIND_GET_ORIG_FN(fn);
2024   before("Reduce");
2025   i_am_root = root == comm_rank(comm);
2026   check_mem_is_defined(sendbuf, count, datatype);
2027   if (i_am_root)
2028      check_mem_is_addressable(recvbuf, count, datatype);
2029   CALL_FN_W_7W(err, fn, sendbuf,recvbuf,count,datatype,op,root,comm);
2030   if (i_am_root)
2031      make_mem_defined_if_addressable_if_success(err, recvbuf, count, datatype);
2032   after("Reduce", err);
2033   return err;
2034}
2035
2036
2037/* --- Allreduce --- */
2038/* rd: (sendbuf,count,datatype) for all
2039   wr: (recvbuf,count,datatype) for all
2040*/
2041int WRAPPER_FOR(PMPI_Allreduce)(void *sendbuf, void *recvbuf,
2042                                int count,
2043                                MPI_Datatype datatype, MPI_Op op,
2044                                MPI_Comm comm)
2045{
2046   OrigFn fn;
2047   int    err;
2048   VALGRIND_GET_ORIG_FN(fn);
2049   before("Allreduce");
2050   check_mem_is_defined(sendbuf, count, datatype);
2051   check_mem_is_addressable(recvbuf, count, datatype);
2052   CALL_FN_W_6W(err, fn, sendbuf,recvbuf,count,datatype,op,comm);
2053   make_mem_defined_if_addressable_if_success(err, recvbuf, count, datatype);
2054   after("Allreduce", err);
2055   return err;
2056}
2057
2058
2059/* --- Op_create --- */
2060/* This is a bit dubious.  I suppose it takes 'function' and
2061   writes something at *op, but who knows what an MPI_Op is?
2062   Can we safely do 'sizeof' on it? */
2063int WRAPPER_FOR(PMPI_Op_create)( MPI_User_function* function,
2064                                 int commute,
2065                                 MPI_Op* op )
2066{
2067   OrigFn fn;
2068   int    err;
2069   VALGRIND_GET_ORIG_FN(fn);
2070   before("Op_create");
2071   check_mem_is_addressable_untyped(op, sizeof(*op));
2072   CALL_FN_W_WWW(err, fn, function,commute,op);
2073   make_mem_defined_if_addressable_if_success_untyped(err, op, sizeof(*op));
2074   after("Op_create", err);
2075   return err;
2076}
2077
2078
2079/*------------------------------------------------------------*/
2080/*---                                                      ---*/
2081/*--- Sec 5.4, Communicator management                     ---*/
2082/*---                                                      ---*/
2083/*------------------------------------------------------------*/
2084
2085/* Hardly seems worth wrapping Comm_rank and Comm_size, but
2086   since it's done now .. */
2087
2088/* --- Comm_create --- */
2089/* Let normal memcheck tracking handle this. */
2090int WRAPPER_FOR(PMPI_Comm_create)(MPI_Comm comm, MPI_Group group,
2091                                  MPI_Comm* newcomm)
2092{
2093   OrigFn fn;
2094   int    err;
2095   VALGRIND_GET_ORIG_FN(fn);
2096   before("Comm_create");
2097   CALL_FN_W_WWW(err, fn, comm,group,newcomm);
2098   after("Comm_create", err);
2099   return err;
2100}
2101
2102/* --- Comm_dup --- */
2103/* Let normal memcheck tracking handle this. */
2104int WRAPPER_FOR(PMPI_Comm_dup)(MPI_Comm comm, MPI_Comm* newcomm)
2105{
2106   OrigFn fn;
2107   int    err;
2108   VALGRIND_GET_ORIG_FN(fn);
2109   before("Comm_dup");
2110   CALL_FN_W_WW(err, fn, comm,newcomm);
2111   after("Comm_dup", err);
2112   return err;
2113}
2114
2115/* --- Comm_free --- */
2116/* Let normal memcheck tracking handle this. */
2117int WRAPPER_FOR(PMPI_Comm_free)(MPI_Comm* comm)
2118{
2119   OrigFn fn;
2120   int    err;
2121   VALGRIND_GET_ORIG_FN(fn);
2122   before("Comm_free");
2123   CALL_FN_W_W(err, fn, comm);
2124   after("Comm_free", err);
2125   return err;
2126}
2127
2128/* --- Comm_rank --- */
2129/* wr: (rank, sizeof(*rank)) */
2130int WRAPPER_FOR(PMPI_Comm_rank)(MPI_Comm comm, int *rank)
2131{
2132   OrigFn fn;
2133   int    err;
2134   VALGRIND_GET_ORIG_FN(fn);
2135   before("Comm_rank");
2136   check_mem_is_addressable_untyped(rank, sizeof(*rank));
2137   CALL_FN_W_WW(err, fn, comm,rank);
2138   make_mem_defined_if_addressable_if_success_untyped(err, rank, sizeof(*rank));
2139   after("Comm_rank", err);
2140   return err;
2141}
2142
2143/* --- Comm_size --- */
2144/* wr: (size, sizeof(*size)) */
2145int WRAPPER_FOR(PMPI_Comm_size)(MPI_Comm comm, int *size)
2146{
2147   OrigFn fn;
2148   int    err;
2149   VALGRIND_GET_ORIG_FN(fn);
2150   before("Comm_size");
2151   check_mem_is_addressable_untyped(size, sizeof(*size));
2152   CALL_FN_W_WW(err, fn, comm,size);
2153   make_mem_defined_if_addressable_if_success_untyped(err, size, sizeof(*size));
2154   after("Comm_size", err);
2155   return err;
2156}
2157
2158
2159/*------------------------------------------------------------*/
2160/*---                                                      ---*/
2161/*--- Sec 5.7, Caching                                     ---*/
2162/*---                                                      ---*/
2163/*------------------------------------------------------------*/
2164
2165
2166/*------------------------------------------------------------*/
2167/*---                                                      ---*/
2168/*--- Sec 7.3, Error codes and classes                     ---*/
2169/*---                                                      ---*/
2170/*------------------------------------------------------------*/
2171
2172/* --- Error_string --- */
2173int WRAPPER_FOR(PMPI_Error_string)( int errorcode, char* string,
2174                                    int* resultlen )
2175{
2176   OrigFn fn;
2177   int    err;
2178   VALGRIND_GET_ORIG_FN(fn);
2179   before("Error_string");
2180   check_mem_is_addressable_untyped(resultlen, sizeof(int));
2181   check_mem_is_addressable_untyped(string, MPI_MAX_ERROR_STRING);
2182   CALL_FN_W_WWW(err, fn, errorcode,string,resultlen);
2183   /* Don't bother to paint the result; we assume the real function
2184      will have filled it with defined characters :-) */
2185   after("Error_string", err);
2186   return err;
2187}
2188
2189
2190/*------------------------------------------------------------*/
2191/*---                                                      ---*/
2192/*--- Sec 7.5, Startup                                     ---*/
2193/*---                                                      ---*/
2194/*------------------------------------------------------------*/
2195
2196/* --- Init --- */
2197/* rd: *argc, *argv[0 .. *argc-1] */
2198long WRAPPER_FOR(PMPI_Init)(int *argc, char ***argv)
2199{
2200   OrigFn fn;
2201   int    err;
2202   VALGRIND_GET_ORIG_FN(fn);
2203   before("Init");
2204   if (argc) {
2205      check_mem_is_defined_untyped(argc, sizeof(int));
2206   }
2207   if (argc && argv) {
2208      check_mem_is_defined_untyped(*argv, *argc * sizeof(char**));
2209   }
2210   CALL_FN_W_WW(err, fn, argc,argv);
2211   after("Init", err);
2212   if (opt_initkludge)
2213      return (long)(void*)&mpiwrap_walk_type_EXTERNALLY_VISIBLE;
2214   else
2215      return (long)err;
2216}
2217
2218/* --- Initialized --- */
2219int WRAPPER_FOR(PMPI_Initialized)(int* flag)
2220{
2221   OrigFn fn;
2222   int    err;
2223   VALGRIND_GET_ORIG_FN(fn);
2224   before("Initialized");
2225   check_mem_is_addressable_untyped(flag, sizeof(int));
2226   CALL_FN_W_W(err, fn, flag);
2227   make_mem_defined_if_addressable_if_success_untyped(err, flag, sizeof(int));
2228   after("Initialized", err);
2229   return err;
2230}
2231
2232/* --- Finalize --- */
2233int WRAPPER_FOR(PMPI_Finalize)(void)
2234{
2235   OrigFn fn;
2236   int    err;
2237   VALGRIND_GET_ORIG_FN(fn);
2238   before("Finalize");
2239   CALL_FN_W_v(err, fn);
2240   after("Finalize", err);
2241   return err;
2242}
2243
2244
2245/*------------------------------------------------------------*/
2246/*---                                                      ---*/
2247/*--- Default wrappers for all remaining functions         ---*/
2248/*---                                                      ---*/
2249/*------------------------------------------------------------*/
2250
2251/* Boilerplate for default wrappers. */
2252#define DEFAULT_WRAPPER_PREAMBLE(basename)                        \
2253      OrigFn fn;                                                  \
2254      UWord  res;                                                 \
2255      static int complaints = 1;                                  \
2256      VALGRIND_GET_ORIG_FN(fn);                                   \
2257      before(#basename);                                          \
2258      if (opt_missing >= 2) {                                     \
2259         barf("no wrapper for PMPI_" #basename                    \
2260              ",\n\t\t\t     and you have "                       \
2261              "requested strict checking");                       \
2262      }                                                           \
2263      if (opt_missing == 1 && complaints > 0) {                   \
2264         fprintf(stderr, "%s %5d: warning: no wrapper "           \
2265                         "for PMPI_" #basename "\n",              \
2266                 preamble, my_pid);                               \
2267         complaints--;                                            \
2268      }                                                           \
2269
2270#define DEFAULT_WRAPPER_W_0W(basename)                            \
2271   UWord WRAPPER_FOR(PMPI_##basename)( void )                     \
2272   {                                                              \
2273      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2274      CALL_FN_W_v(res, fn);                                       \
2275      return res;                                                 \
2276   }
2277
2278#define DEFAULT_WRAPPER_W_1W(basename)                            \
2279   UWord WRAPPER_FOR(PMPI_##basename)( UWord a1 )                 \
2280   {                                                              \
2281      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2282      CALL_FN_W_W(res, fn, a1);                                   \
2283      return res;                                                 \
2284   }
2285
2286#define DEFAULT_WRAPPER_W_2W(basename)                            \
2287   UWord WRAPPER_FOR(PMPI_##basename)( UWord a1, UWord a2 )       \
2288   {                                                              \
2289      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2290      CALL_FN_W_WW(res, fn, a1,a2);                               \
2291      return res;                                                 \
2292   }
2293
2294#define DEFAULT_WRAPPER_W_3W(basename)                            \
2295   UWord WRAPPER_FOR(PMPI_##basename)                             \
2296      ( UWord a1, UWord a2, UWord a3 )                            \
2297   {                                                              \
2298      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2299      CALL_FN_W_WWW(res, fn, a1,a2,a3);                           \
2300      return res;                                                 \
2301   }
2302
2303#define DEFAULT_WRAPPER_W_4W(basename)                            \
2304   UWord WRAPPER_FOR(PMPI_##basename)                             \
2305      ( UWord a1, UWord a2, UWord a3, UWord a4 )                  \
2306   {                                                              \
2307      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2308      CALL_FN_W_WWWW(res, fn, a1,a2,a3,a4);                       \
2309      return res;                                                 \
2310   }
2311
2312#define DEFAULT_WRAPPER_W_5W(basename)                            \
2313   UWord WRAPPER_FOR(PMPI_##basename)                             \
2314      ( UWord a1, UWord a2, UWord a3, UWord a4, UWord a5 )        \
2315   {                                                              \
2316      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2317      CALL_FN_W_5W(res, fn, a1,a2,a3,a4,a5);                      \
2318      return res;                                                 \
2319   }
2320
2321#define DEFAULT_WRAPPER_W_6W(basename)                            \
2322   UWord WRAPPER_FOR(PMPI_##basename)                             \
2323      ( UWord a1, UWord a2, UWord a3, UWord a4, UWord a5,         \
2324        UWord a6 )                                                \
2325   {                                                              \
2326      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2327      CALL_FN_W_6W(res, fn, a1,a2,a3,a4,a5,a6);                   \
2328      return res;                                                 \
2329   }
2330
2331#define DEFAULT_WRAPPER_W_7W(basename)                            \
2332   UWord WRAPPER_FOR(PMPI_##basename)                             \
2333      ( UWord a1, UWord a2, UWord a3, UWord a4, UWord a5,         \
2334        UWord a6, UWord a7 )                                      \
2335   {                                                              \
2336      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2337      CALL_FN_W_7W(res, fn, a1,a2,a3,a4,a5,a6,a7);                \
2338      return res;                                                 \
2339   }
2340
2341#define DEFAULT_WRAPPER_W_8W(basename)                            \
2342   UWord WRAPPER_FOR(PMPI_##basename)                             \
2343      ( UWord a1, UWord a2, UWord a3, UWord a4, UWord a5,         \
2344        UWord a6, UWord a7, UWord a8 )                            \
2345   {                                                              \
2346      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2347      CALL_FN_W_8W(res, fn, a1,a2,a3,a4,a5,a6,a7,a8);             \
2348      return res;                                                 \
2349   }
2350
2351#define DEFAULT_WRAPPER_W_9W(basename)                            \
2352   UWord WRAPPER_FOR(PMPI_##basename)                             \
2353      ( UWord a1, UWord a2, UWord a3, UWord a4, UWord a5,         \
2354        UWord a6, UWord a7, UWord a8, UWord a9 )                  \
2355   {                                                              \
2356      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2357      CALL_FN_W_9W(res, fn, a1,a2,a3,a4,a5,a6,a7,a8,a9);          \
2358      return res;                                                 \
2359   }
2360
2361#define DEFAULT_WRAPPER_W_10W(basename)                           \
2362   UWord WRAPPER_FOR(PMPI_##basename)                             \
2363      ( UWord a1, UWord a2, UWord a3, UWord a4, UWord a5,         \
2364        UWord a6, UWord a7, UWord a8, UWord a9, UWord a10 )       \
2365   {                                                              \
2366      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2367      CALL_FN_W_10W(res, fn, a1,a2,a3,a4,a5,a6,a7,a8,a9,a10);     \
2368      return res;                                                 \
2369   }
2370
2371#define DEFAULT_WRAPPER_W_12W(basename)                           \
2372   UWord WRAPPER_FOR(PMPI_##basename)                             \
2373      ( UWord a1, UWord a2, UWord a3, UWord a4, UWord a5,         \
2374        UWord a6, UWord a7, UWord a8, UWord a9, UWord a10,        \
2375        UWord a11, UWord a12 )                                    \
2376   {                                                              \
2377      DEFAULT_WRAPPER_PREAMBLE(basename)                          \
2378      CALL_FN_W_12W(res, fn, a1,a2,a3,a4,a5,a6,                   \
2379                             a7,a8,a9,a10,a11,a12);               \
2380      return res;                                                 \
2381   }
2382
2383
2384/* Canned summary of MPI-1.1/MPI-2 entry points, as derived from mpi.h
2385   from Open MPI svn rev 9191 (somewhere between Open MPI versions
2386   1.0.1 and 1.1.0). */
2387
2388/* If a function is commented out in this list, it's because it has a
2389   proper wrapper written elsewhere (above here). */
2390
2391DEFAULT_WRAPPER_W_2W(Abort)
2392DEFAULT_WRAPPER_W_9W(Accumulate)
2393DEFAULT_WRAPPER_W_1W(Add_error_class)
2394DEFAULT_WRAPPER_W_2W(Add_error_code)
2395DEFAULT_WRAPPER_W_2W(Add_error_string)
2396/* DEFAULT_WRAPPER_W_2W(Address) */
2397DEFAULT_WRAPPER_W_7W(Allgather)
2398DEFAULT_WRAPPER_W_8W(Allgatherv)
2399DEFAULT_WRAPPER_W_3W(Alloc_mem)
2400/* DEFAULT_WRAPPER_W_6W(Allreduce) */
2401/* DEFAULT_WRAPPER_W_7W(Alltoall) */
2402DEFAULT_WRAPPER_W_9W(Alltoallv)
2403DEFAULT_WRAPPER_W_9W(Alltoallw)
2404DEFAULT_WRAPPER_W_2W(Attr_delete)
2405DEFAULT_WRAPPER_W_4W(Attr_get)
2406DEFAULT_WRAPPER_W_3W(Attr_put)
2407DEFAULT_WRAPPER_W_1W(Barrier)
2408/* DEFAULT_WRAPPER_W_5W(Bcast) */
2409/* DEFAULT_WRAPPER_W_6W(Bsend) */
2410DEFAULT_WRAPPER_W_7W(Bsend_init)
2411DEFAULT_WRAPPER_W_2W(Buffer_attach)
2412DEFAULT_WRAPPER_W_2W(Buffer_detach)
2413/* DEFAULT_WRAPPER_W_1W(Cancel) */
2414DEFAULT_WRAPPER_W_4W(Cart_coords)
2415DEFAULT_WRAPPER_W_6W(Cart_create)
2416DEFAULT_WRAPPER_W_5W(Cart_get)
2417DEFAULT_WRAPPER_W_5W(Cart_map)
2418DEFAULT_WRAPPER_W_3W(Cart_rank)
2419DEFAULT_WRAPPER_W_5W(Cart_shift)
2420DEFAULT_WRAPPER_W_3W(Cart_sub)
2421DEFAULT_WRAPPER_W_2W(Cartdim_get)
2422DEFAULT_WRAPPER_W_1W(Close_port)
2423DEFAULT_WRAPPER_W_5W(Comm_accept)
2424DEFAULT_WRAPPER_W_1W(Comm_c2f)
2425DEFAULT_WRAPPER_W_2W(Comm_call_errhandler)
2426DEFAULT_WRAPPER_W_3W(Comm_compare)
2427DEFAULT_WRAPPER_W_5W(Comm_connect)
2428DEFAULT_WRAPPER_W_2W(Comm_create_errhandler)
2429DEFAULT_WRAPPER_W_4W(Comm_create_keyval)
2430/* DEFAULT_WRAPPER_W_3W(Comm_create) */
2431DEFAULT_WRAPPER_W_2W(Comm_delete_attr)
2432DEFAULT_WRAPPER_W_1W(Comm_disconnect)
2433/* DEFAULT_WRAPPER_W_2W(Comm_dup) */
2434DEFAULT_WRAPPER_W_1W(Comm_f2c)
2435DEFAULT_WRAPPER_W_1W(Comm_free_keyval)
2436/* DEFAULT_WRAPPER_W_1W(Comm_free) */
2437DEFAULT_WRAPPER_W_4W(Comm_get_attr)
2438DEFAULT_WRAPPER_W_2W(Comm_get_errhandler)
2439DEFAULT_WRAPPER_W_3W(Comm_get_name)
2440DEFAULT_WRAPPER_W_1W(Comm_get_parent)
2441DEFAULT_WRAPPER_W_2W(Comm_group)
2442DEFAULT_WRAPPER_W_2W(Comm_join)
2443/* DEFAULT_WRAPPER_W_2W(Comm_rank) */
2444DEFAULT_WRAPPER_W_2W(Comm_remote_group)
2445DEFAULT_WRAPPER_W_2W(Comm_remote_size)
2446DEFAULT_WRAPPER_W_3W(Comm_set_attr)
2447DEFAULT_WRAPPER_W_2W(Comm_set_errhandler)
2448DEFAULT_WRAPPER_W_2W(Comm_set_name)
2449/* DEFAULT_WRAPPER_W_2W(Comm_size) */
2450DEFAULT_WRAPPER_W_8W(Comm_spawn)
2451DEFAULT_WRAPPER_W_9W(Comm_spawn_multiple)
2452DEFAULT_WRAPPER_W_4W(Comm_split)
2453DEFAULT_WRAPPER_W_2W(Comm_test_inter)
2454DEFAULT_WRAPPER_W_3W(Dims_create)
2455DEFAULT_WRAPPER_W_1W(Errhandler_c2f)
2456DEFAULT_WRAPPER_W_2W(Errhandler_create)
2457DEFAULT_WRAPPER_W_1W(Errhandler_f2c)
2458DEFAULT_WRAPPER_W_1W(Errhandler_free)
2459DEFAULT_WRAPPER_W_2W(Errhandler_get)
2460DEFAULT_WRAPPER_W_2W(Errhandler_set)
2461DEFAULT_WRAPPER_W_2W(Error_class)
2462/* DEFAULT_WRAPPER_W_3W(Error_string) */
2463DEFAULT_WRAPPER_W_6W(Exscan)
2464DEFAULT_WRAPPER_W_1W(File_c2f)
2465DEFAULT_WRAPPER_W_1W(File_f2c)
2466DEFAULT_WRAPPER_W_2W(File_call_errhandler)
2467DEFAULT_WRAPPER_W_2W(File_create_errhandler)
2468DEFAULT_WRAPPER_W_2W(File_set_errhandler)
2469DEFAULT_WRAPPER_W_2W(File_get_errhandler)
2470DEFAULT_WRAPPER_W_5W(File_open)
2471DEFAULT_WRAPPER_W_1W(File_close)
2472DEFAULT_WRAPPER_W_2W(File_delete)
2473DEFAULT_WRAPPER_W_2W(File_set_size)
2474DEFAULT_WRAPPER_W_2W(File_preallocate)
2475DEFAULT_WRAPPER_W_2W(File_get_size)
2476DEFAULT_WRAPPER_W_2W(File_get_group)
2477DEFAULT_WRAPPER_W_2W(File_get_amode)
2478DEFAULT_WRAPPER_W_2W(File_set_info)
2479DEFAULT_WRAPPER_W_2W(File_get_info)
2480DEFAULT_WRAPPER_W_6W(File_set_view)
2481DEFAULT_WRAPPER_W_5W(File_get_view)
2482DEFAULT_WRAPPER_W_6W(File_read_at)
2483DEFAULT_WRAPPER_W_6W(File_read_at_all)
2484DEFAULT_WRAPPER_W_6W(File_write_at)
2485DEFAULT_WRAPPER_W_6W(File_write_at_all)
2486DEFAULT_WRAPPER_W_6W(File_iread_at)
2487DEFAULT_WRAPPER_W_6W(File_iwrite_at)
2488DEFAULT_WRAPPER_W_5W(File_read)
2489DEFAULT_WRAPPER_W_5W(File_read_all)
2490DEFAULT_WRAPPER_W_5W(File_write)
2491DEFAULT_WRAPPER_W_5W(File_write_all)
2492DEFAULT_WRAPPER_W_5W(File_iread)
2493DEFAULT_WRAPPER_W_5W(File_iwrite)
2494DEFAULT_WRAPPER_W_3W(File_seek)
2495DEFAULT_WRAPPER_W_2W(File_get_position)
2496DEFAULT_WRAPPER_W_3W(File_get_byte_offset)
2497DEFAULT_WRAPPER_W_5W(File_read_shared)
2498DEFAULT_WRAPPER_W_5W(File_write_shared)
2499DEFAULT_WRAPPER_W_5W(File_iread_shared)
2500DEFAULT_WRAPPER_W_5W(File_iwrite_shared)
2501DEFAULT_WRAPPER_W_5W(File_read_ordered)
2502DEFAULT_WRAPPER_W_5W(File_write_ordered)
2503DEFAULT_WRAPPER_W_3W(File_seek_shared)
2504DEFAULT_WRAPPER_W_2W(File_get_position_shared)
2505DEFAULT_WRAPPER_W_5W(File_read_at_all_begin)
2506DEFAULT_WRAPPER_W_3W(File_read_at_all_end)
2507DEFAULT_WRAPPER_W_5W(File_write_at_all_begin)
2508DEFAULT_WRAPPER_W_3W(File_write_at_all_end)
2509DEFAULT_WRAPPER_W_4W(File_read_all_begin)
2510DEFAULT_WRAPPER_W_3W(File_read_all_end)
2511DEFAULT_WRAPPER_W_4W(File_write_all_begin)
2512DEFAULT_WRAPPER_W_3W(File_write_all_end)
2513DEFAULT_WRAPPER_W_4W(File_read_ordered_begin)
2514DEFAULT_WRAPPER_W_3W(File_read_ordered_end)
2515DEFAULT_WRAPPER_W_4W(File_write_ordered_begin)
2516DEFAULT_WRAPPER_W_3W(File_write_ordered_end)
2517DEFAULT_WRAPPER_W_3W(File_get_type_extent)
2518DEFAULT_WRAPPER_W_2W(File_set_atomicity)
2519DEFAULT_WRAPPER_W_2W(File_get_atomicity)
2520DEFAULT_WRAPPER_W_1W(File_sync)
2521/* DEFAULT_WRAPPER_W_0W(Finalize) */
2522DEFAULT_WRAPPER_W_1W(Finalized)
2523DEFAULT_WRAPPER_W_1W(Free_mem)
2524/* DEFAULT_WRAPPER_W_8W(Gather) */
2525DEFAULT_WRAPPER_W_9W(Gatherv)
2526DEFAULT_WRAPPER_W_2W(Get_address)
2527/* DEFAULT_WRAPPER_W_3W(Get_count) */
2528DEFAULT_WRAPPER_W_3W(Get_elements)
2529DEFAULT_WRAPPER_W_8W(Get)
2530DEFAULT_WRAPPER_W_2W(Get_processor_name)
2531DEFAULT_WRAPPER_W_2W(Get_version)
2532DEFAULT_WRAPPER_W_6W(Graph_create)
2533DEFAULT_WRAPPER_W_5W(Graph_get)
2534DEFAULT_WRAPPER_W_5W(Graph_map)
2535DEFAULT_WRAPPER_W_3W(Graph_neighbors_count)
2536DEFAULT_WRAPPER_W_4W(Graph_neighbors)
2537DEFAULT_WRAPPER_W_3W(Graphdims_get)
2538DEFAULT_WRAPPER_W_1W(Grequest_complete)
2539DEFAULT_WRAPPER_W_5W(Grequest_start)
2540DEFAULT_WRAPPER_W_1W(Group_c2f)
2541DEFAULT_WRAPPER_W_3W(Group_compare)
2542DEFAULT_WRAPPER_W_3W(Group_difference)
2543DEFAULT_WRAPPER_W_4W(Group_excl)
2544DEFAULT_WRAPPER_W_1W(Group_f2c)
2545DEFAULT_WRAPPER_W_1W(Group_free)
2546DEFAULT_WRAPPER_W_4W(Group_incl)
2547DEFAULT_WRAPPER_W_3W(Group_intersection)
2548DEFAULT_WRAPPER_W_4W(Group_range_excl)
2549DEFAULT_WRAPPER_W_4W(Group_range_incl)
2550DEFAULT_WRAPPER_W_2W(Group_rank)
2551DEFAULT_WRAPPER_W_2W(Group_size)
2552DEFAULT_WRAPPER_W_5W(Group_translate_ranks)
2553DEFAULT_WRAPPER_W_3W(Group_union)
2554/* DEFAULT_WRAPPER_W_7W(Ibsend) */
2555DEFAULT_WRAPPER_W_1W(Info_c2f)
2556DEFAULT_WRAPPER_W_1W(Info_create)
2557DEFAULT_WRAPPER_W_2W(Info_delete)
2558DEFAULT_WRAPPER_W_2W(Info_dup)
2559DEFAULT_WRAPPER_W_1W(Info_f2c)
2560DEFAULT_WRAPPER_W_1W(Info_free)
2561DEFAULT_WRAPPER_W_5W(Info_get)
2562DEFAULT_WRAPPER_W_2W(Info_get_nkeys)
2563DEFAULT_WRAPPER_W_3W(Info_get_nthkey)
2564DEFAULT_WRAPPER_W_4W(Info_get_valuelen)
2565DEFAULT_WRAPPER_W_3W(Info_set)
2566/* DEFAULT_WRAPPER_W_2W(Init) */
2567/* DEFAULT_WRAPPER_W_1W(Initialized) */
2568DEFAULT_WRAPPER_W_4W(Init_thread)
2569DEFAULT_WRAPPER_W_6W(Intercomm_create)
2570DEFAULT_WRAPPER_W_3W(Intercomm_merge)
2571/* DEFAULT_WRAPPER_W_5W(Iprobe) */
2572/* DEFAULT_WRAPPER_W_7W(Irecv) */
2573/* DEFAULT_WRAPPER_W_7W(Irsend) */
2574/* DEFAULT_WRAPPER_W_7W(Isend) */
2575/* DEFAULT_WRAPPER_W_7W(Issend) */
2576DEFAULT_WRAPPER_W_1W(Is_thread_main)
2577DEFAULT_WRAPPER_W_4W(Keyval_create)
2578DEFAULT_WRAPPER_W_1W(Keyval_free)
2579DEFAULT_WRAPPER_W_3W(Lookup_name)
2580DEFAULT_WRAPPER_W_1W(Op_c2f)
2581/* DEFAULT_WRAPPER_W_3W(Op_create) */
2582DEFAULT_WRAPPER_W_2W(Open_port)
2583DEFAULT_WRAPPER_W_1W(Op_f2c)
2584DEFAULT_WRAPPER_W_1W(Op_free)
2585DEFAULT_WRAPPER_W_7W(Pack_external)
2586DEFAULT_WRAPPER_W_4W(Pack_external_size)
2587/* DEFAULT_WRAPPER_W_7W(Pack) */
2588DEFAULT_WRAPPER_W_4W(Pack_size)
2589/* int MPI_Pcontrol(const int level, ...) */
2590/* DEFAULT_WRAPPER_W_4W(Probe) */
2591DEFAULT_WRAPPER_W_3W(Publish_name)
2592DEFAULT_WRAPPER_W_8W(Put)
2593DEFAULT_WRAPPER_W_1W(Query_thread)
2594DEFAULT_WRAPPER_W_7W(Recv_init)
2595/* DEFAULT_WRAPPER_W_7W(Recv) */
2596/* DEFAULT_WRAPPER_W_7W(Reduce) */
2597DEFAULT_WRAPPER_W_6W(Reduce_scatter)
2598DEFAULT_WRAPPER_W_5W(Register_datarep)
2599DEFAULT_WRAPPER_W_1W(Request_c2f)
2600DEFAULT_WRAPPER_W_1W(Request_f2c)
2601DEFAULT_WRAPPER_W_1W(Request_free)
2602DEFAULT_WRAPPER_W_3W(Request_get_status)
2603/* DEFAULT_WRAPPER_W_6W(Rsend) */
2604DEFAULT_WRAPPER_W_7W(Rsend_init)
2605DEFAULT_WRAPPER_W_6W(Scan)
2606/* DEFAULT_WRAPPER_W_8W(Scatter) */
2607DEFAULT_WRAPPER_W_9W(Scatterv)
2608DEFAULT_WRAPPER_W_7W(Send_init)
2609/* DEFAULT_WRAPPER_W_6W(Send) */
2610/* DEFAULT_WRAPPER_W_12W(Sendrecv) */
2611DEFAULT_WRAPPER_W_9W(Sendrecv_replace)
2612DEFAULT_WRAPPER_W_7W(Ssend_init)
2613/* DEFAULT_WRAPPER_W_6W(Ssend) */
2614DEFAULT_WRAPPER_W_1W(Start)
2615DEFAULT_WRAPPER_W_2W(Startall)
2616DEFAULT_WRAPPER_W_2W(Status_c2f)
2617DEFAULT_WRAPPER_W_2W(Status_f2c)
2618DEFAULT_WRAPPER_W_2W(Status_set_cancelled)
2619DEFAULT_WRAPPER_W_3W(Status_set_elements)
2620/* DEFAULT_WRAPPER_W_4W(Testall) */
2621DEFAULT_WRAPPER_W_5W(Testany)
2622/* DEFAULT_WRAPPER_W_3W(Test) */
2623DEFAULT_WRAPPER_W_2W(Test_cancelled)
2624DEFAULT_WRAPPER_W_5W(Testsome)
2625DEFAULT_WRAPPER_W_2W(Topo_test)
2626DEFAULT_WRAPPER_W_1W(Type_c2f)
2627/* DEFAULT_WRAPPER_W_1W(Type_commit) */
2628DEFAULT_WRAPPER_W_3W(Type_contiguous)
2629DEFAULT_WRAPPER_W_10W(Type_create_darray)
2630DEFAULT_WRAPPER_W_3W(Type_create_f90_complex)
2631DEFAULT_WRAPPER_W_2W(Type_create_f90_integer)
2632DEFAULT_WRAPPER_W_3W(Type_create_f90_real)
2633DEFAULT_WRAPPER_W_5W(Type_create_hindexed)
2634DEFAULT_WRAPPER_W_5W(Type_create_hvector)
2635DEFAULT_WRAPPER_W_4W(Type_create_keyval)
2636DEFAULT_WRAPPER_W_5W(Type_create_indexed_block)
2637DEFAULT_WRAPPER_W_5W(Type_create_struct)
2638DEFAULT_WRAPPER_W_7W(Type_create_subarray)
2639DEFAULT_WRAPPER_W_4W(Type_create_resized)
2640DEFAULT_WRAPPER_W_2W(Type_delete_attr)
2641DEFAULT_WRAPPER_W_2W(Type_dup)
2642/* DEFAULT_WRAPPER_W_2W(Type_extent) */
2643/* DEFAULT_WRAPPER_W_1W(Type_free) */
2644DEFAULT_WRAPPER_W_1W(Type_free_keyval)
2645DEFAULT_WRAPPER_W_1W(Type_f2c)
2646DEFAULT_WRAPPER_W_4W(Type_get_attr)
2647/* DEFAULT_WRAPPER_W_7W(Type_get_contents) */
2648/* DEFAULT_WRAPPER_W_5W(Type_get_envelope) */
2649DEFAULT_WRAPPER_W_3W(Type_get_extent)
2650DEFAULT_WRAPPER_W_3W(Type_get_name)
2651DEFAULT_WRAPPER_W_3W(Type_get_true_extent)
2652DEFAULT_WRAPPER_W_5W(Type_hindexed)
2653DEFAULT_WRAPPER_W_5W(Type_hvector)
2654DEFAULT_WRAPPER_W_5W(Type_indexed)
2655DEFAULT_WRAPPER_W_2W(Type_lb)
2656DEFAULT_WRAPPER_W_3W(Type_match_size)
2657DEFAULT_WRAPPER_W_3W(Type_set_attr)
2658DEFAULT_WRAPPER_W_2W(Type_set_name)
2659DEFAULT_WRAPPER_W_2W(Type_size)
2660DEFAULT_WRAPPER_W_5W(Type_struct)
2661DEFAULT_WRAPPER_W_2W(Type_ub)
2662DEFAULT_WRAPPER_W_5W(Type_vector)
2663/* DEFAULT_WRAPPER_W_7W(Unpack) */
2664DEFAULT_WRAPPER_W_3W(Unpublish_name)
2665DEFAULT_WRAPPER_W_7W(Unpack_external)
2666/* DEFAULT_WRAPPER_W_3W(Waitall) */
2667/* DEFAULT_WRAPPER_W_4W(Waitany) */
2668/* DEFAULT_WRAPPER_W_2W(Wait) */
2669DEFAULT_WRAPPER_W_5W(Waitsome)
2670DEFAULT_WRAPPER_W_1W(Win_c2f)
2671DEFAULT_WRAPPER_W_2W(Win_call_errhandler)
2672DEFAULT_WRAPPER_W_1W(Win_complete)
2673DEFAULT_WRAPPER_W_6W(Win_create)
2674DEFAULT_WRAPPER_W_2W(Win_create_errhandler)
2675DEFAULT_WRAPPER_W_4W(Win_create_keyval)
2676DEFAULT_WRAPPER_W_2W(Win_delete_attr)
2677DEFAULT_WRAPPER_W_1W(Win_f2c)
2678DEFAULT_WRAPPER_W_2W(Win_fence)
2679DEFAULT_WRAPPER_W_1W(Win_free)
2680DEFAULT_WRAPPER_W_1W(Win_free_keyval)
2681DEFAULT_WRAPPER_W_4W(Win_get_attr)
2682DEFAULT_WRAPPER_W_2W(Win_get_errhandler)
2683DEFAULT_WRAPPER_W_2W(Win_get_group)
2684DEFAULT_WRAPPER_W_3W(Win_get_name)
2685DEFAULT_WRAPPER_W_4W(Win_lock)
2686DEFAULT_WRAPPER_W_3W(Win_post)
2687DEFAULT_WRAPPER_W_3W(Win_set_attr)
2688DEFAULT_WRAPPER_W_2W(Win_set_errhandler)
2689DEFAULT_WRAPPER_W_2W(Win_set_name)
2690DEFAULT_WRAPPER_W_3W(Win_start)
2691DEFAULT_WRAPPER_W_2W(Win_test)
2692DEFAULT_WRAPPER_W_2W(Win_unlock)
2693DEFAULT_WRAPPER_W_1W(Win_wait)
2694/* double MPI_Wtick(void) */
2695/* double MPI_Wtime(void) */
2696
2697
2698/*------------------------------------------------------------*/
2699/*---                                                      ---*/
2700/*---                                                      ---*/
2701/*---                                                      ---*/
2702/*------------------------------------------------------------*/
2703
2704/*---------------------------------------------------------------*/
2705/*--- end                                           mpiwrap.c ---*/
2706/*---------------------------------------------------------------*/
2707