1char   netcpu_looper_id[]="\
2@(#)netcpu_looper.c (c) Copyright 2005-2007. Version 2.4.3";
3
4/* netcpu_looper.c
5
6   Implement the soaker process specific portions of netperf CPU
7   utilization measurements. These are broken-out into a separate file
8   to make life much nicer over in netlib.c which had become a maze of
9   twisty, CPU-util-related, #ifdefs, all different.  raj 2005-01-26
10   */
11
12#ifdef HAVE_CONFIG_H
13#include <config.h>
14#endif
15
16#include <stdio.h>
17
18#ifdef HAVE_FCNTL_H
19# include <fcntl.h>
20#endif
21#if HAVE_UNISTD_H
22# include <unistd.h>
23#endif
24#if defined(HAVE_MMAP) || defined(HAVE_SYS_MMAN_H)
25# include <sys/mman.h>
26#else
27# error netcpu_looper requires mmap
28#endif
29
30#if TIME_WITH_SYS_TIME
31# include <sys/time.h>
32# include <time.h>
33#else
34# if HAVE_SYS_TIME_H
35#  include <sys/time.h>
36# else
37#  include <time.h>
38# endif
39#endif
40
41#if HAVE_SYS_TYPES_H
42# include <sys/types.h>
43#endif
44
45#if HAVE_SYS_WAIT_H
46# include <sys/wait.h>
47#endif
48
49#ifdef HAVE_SIGNAL_H
50#include <signal.h>
51#endif
52
53#ifdef HAVE_ERRNO_H
54#include <errno.h>
55#endif
56
57#include "netsh.h"
58#include "netlib.h"
59
60#define PAGES_PER_CHILD 2
61
62/* the lib_start_count and lib_end_count arrays hold the starting
63   and ending values of whatever is counting when the system is
64   idle. The rate at which this increments during a test is compared
65   with a previous calibrarion to arrive at a CPU utilization
66   percentage. raj 2005-01-26 */
67static uint64_t  lib_start_count[MAXCPUS];
68static uint64_t  lib_end_count[MAXCPUS];
69
70static int *cpu_mappings;
71
72static int lib_idle_fd;
73static uint64_t *lib_idle_address[MAXCPUS];
74static long     *lib_base_pointer;
75static pid_t     lib_idle_pids[MAXCPUS];
76static int       lib_loopers_running=0;
77
78/* we used to use this code to bind the loopers, but since we have
79   decided to enable processor affinity for the actual
80   netperf/netserver processes we will use that affinity routine,
81   which happens to know about more systems than this */
82
83#ifdef NOTDEF
84static void
85bind_to_processor(int child_num)
86{
87  /* This routine will bind the calling process to a particular */
88  /* processor. We are not choosy as to which processor, so it will be */
89  /* the process id mod the number of processors - shifted by one for */
90  /* those systems which name processor starting from one instead of */
91  /* zero. on those systems where I do not yet know how to bind a */
92  /* process to a processor, this routine will be a no-op raj 10/95 */
93
94  /* just as a reminder, this is *only* for the looper processes, not */
95  /* the actual measurement processes. those will, should, MUST float */
96  /* or not float from CPU to CPU as controlled by the operating */
97  /* system defaults. raj 12/95 */
98
99#ifdef __hpux
100#include <sys/syscall.h>
101#include <sys/mp.h>
102
103  int old_cpu = -2;
104
105  if (debug) {
106    fprintf(where,
107            "child %d asking for CPU %d as pid %d with %d CPUs\n",
108            child_num,
109            (child_num % lib_num_loc_cpus),
110            getpid(),
111            lib_num_loc_cpus);
112    fflush(where);
113  }
114
115  SETPROCESS((child_num % lib_num_loc_cpus), getpid());
116  return;
117
118#else
119#if defined(__sun) && defined(__SVR4)
120 /* should only be Solaris */
121#include <sys/processor.h>
122#include <sys/procset.h>
123
124  int old_binding;
125
126  if (debug) {
127    fprintf(where,
128            "bind_to_processor: child %d asking for CPU %d as pid %d with %d CPUs\n",
129            child_num,
130            (child_num % lib_num_loc_cpus),
131            getpid(),
132            lib_num_loc_cpus);
133    fflush(where);
134  }
135
136  if (processor_bind(P_PID,
137                     getpid(),
138                     (child_num % lib_num_loc_cpus),
139                      &old_binding) != 0) {
140    fprintf(where,"bind_to_processor: unable to perform processor binding\n");
141    fprintf(where,"                   errno %d\n",errno);
142    fflush(where);
143  }
144  return;
145#else
146#ifdef WIN32
147
148  if (!SetThreadAffinityMask(GetCurrentThread(), (ULONG_PTR)1 << (child_num % lib_num_loc_cpus))) {
149    perror("SetThreadAffinityMask failed");
150    fflush(stderr);
151  }
152
153  if (debug) {
154    fprintf(where,
155            "bind_to_processor: child %d asking for CPU %d of %d CPUs\n",
156            child_num,
157            (child_num % lib_num_loc_cpus),
158            lib_num_loc_cpus);
159    fflush(where);
160  }
161
162#endif
163  return;
164#endif /* __sun && _SVR4 */
165#endif /* __hpux */
166}
167#endif
168
169 /* sit_and_spin will just spin about incrementing a value */
170 /* this value will either be in a memory mapped region on Unix shared */
171 /* by each looper process, or something appropriate on Windows/NT */
172 /* (malloc'd or such). This routine is reasonably ugly in that it has */
173 /* priority manipulating code for lots of different operating */
174 /* systems. This routine never returns. raj 1/96 */
175
176static void
177sit_and_spin(int child_index)
178
179{
180  uint64_t *my_counter_ptr;
181
182 /* only use C stuff if we are not WIN32 unless and until we */
183 /* switch from CreateThread to _beginthread. raj 1/96 */
184#ifndef WIN32
185  /* we are the child. we could decide to exec some separate */
186  /* program, but that doesn't really seem worthwhile - raj 4/95 */
187  if (debug > 1) {
188    fprintf(where,
189            "Looper child %d is born, pid %d\n",
190            child_index,
191            getpid());
192    fflush(where);
193  }
194
195#endif /* WIN32 */
196
197  /* reset our base pointer to be at the appropriate offset */
198  my_counter_ptr = (uint64_t *) ((char *)lib_base_pointer +
199                             (netlib_get_page_size() *
200                              PAGES_PER_CHILD * child_index));
201
202  /* in the event we are running on an MP system, it would */
203  /* probably be good to bind the soaker processes to specific */
204  /* processors. I *think* this is the most reasonable thing to */
205  /* do, and would be closes to simulating the information we get */
206  /* on HP-UX with pstat. I could put all the system-specific code */
207  /* here, but will "abstract it into another routine to keep this */
208  /* area more readable. I'll probably do the same thine with the */
209  /* "low pri code" raj 10/95 */
210
211  /* since we are "flying blind" wrt where we should bind the looper
212     processes, we want to use the cpu_map that was prepared by netlib
213     rather than assume that the CPU ids on the system start at zero
214     and are contiguous. raj 2006-04-03 */
215  bind_to_specific_processor(child_index % lib_num_loc_cpus,1);
216
217  for (*my_counter_ptr = 0L;
218       ;
219       (*my_counter_ptr)++) {
220    if (!(*lib_base_pointer % 1)) {
221      /* every once and again, make sure that our process priority is */
222      /* nice and low. also, by making system calls, it may be easier */
223      /* for us to be pre-empted by something that needs to do useful */
224      /* work - like the thread of execution actually sending and */
225      /* receiving data across the network :) */
226#ifdef _AIX
227      int pid,prio;
228
229      prio = PRIORITY;
230      pid = getpid();
231      /* if you are not root, this call will return EPERM - why one */
232      /* cannot change one's own priority to  lower value is beyond */
233      /* me. raj 2/26/96 */
234      setpri(pid, prio);
235#else /* _AIX */
236#ifdef __sgi
237      int pid,prio;
238
239      prio = PRIORITY;
240      pid = getpid();
241      schedctl(NDPRI, pid, prio);
242      sginap(0);
243#else /* __sgi */
244#ifdef WIN32
245      SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_IDLE);
246#else /* WIN32 */
247#if defined(__sun) && defined(__SVR4)
248#include <sys/types.h>
249#include <sys/priocntl.h>
250#include <sys/rtpriocntl.h>
251#include <sys/tspriocntl.h>
252      /* I would *really* like to know how to use priocntl to make the */
253      /* priority low for this looper process. however, either my mind */
254      /* is addled, or the manpage in section two for priocntl is not */
255      /* terribly helpful - for one, it has no examples :( so, if you */
256      /* can help, I'd love to hear from you. in the meantime, we will */
257      /* rely on nice(39). raj 2/26/96 */
258      nice(39);
259#else /* __sun && __SVR4 */
260      nice(39);
261#endif /* __sun && _SVR4 */
262#endif /* WIN32 */
263#endif /* __sgi */
264#endif /* _AIX */
265    }
266  }
267}
268
269
270
271 /* this routine will start all the looper processes or threads for */
272 /* measuring CPU utilization. */
273
274static void
275start_looper_processes()
276{
277
278  unsigned int      i, file_size;
279
280  /* we want at least two pages for each processor. the */
281  /* child for any one processor will write to the first of his two */
282  /* pages, and the second page will be a buffer in case there is page */
283  /* prefetching. if your system pre-fetches more than a single page, */
284  /* well, you'll have to modify this or live with it :( raj 4/95 */
285
286  file_size = ((netlib_get_page_size() * PAGES_PER_CHILD) *
287               lib_num_loc_cpus);
288
289#ifndef WIN32
290
291  /* we we are not using WINDOWS NT (or 95 actually :), then we want */
292  /* to create a memory mapped region so we can see all the counting */
293  /* rates of the loopers */
294
295  /* could we just use an anonymous memory region for this? it is */
296  /* possible that using a mmap()'ed "real" file, while convenient for */
297  /* debugging, could result in some filesystem activity - like */
298  /* metadata updates? raj 4/96 */
299  lib_idle_fd = open("/tmp/netperf_cpu",O_RDWR | O_CREAT | O_EXCL);
300
301  if (lib_idle_fd == -1) {
302    fprintf(where,"create_looper: file creation; errno %d\n",errno);
303    fflush(where);
304    exit(1);
305  }
306
307  if (chmod("/tmp/netperf_cpu",0644) == -1) {
308    fprintf(where,"create_looper: chmod; errno %d\n",errno);
309    fflush(where);
310    exit(1);
311  }
312
313  /* with the file descriptor in place, lets be sure that the file is */
314  /* large enough. */
315
316  if (truncate("/tmp/netperf_cpu",file_size) == -1) {
317    fprintf(where,"create_looper: truncate: errno %d\n",errno);
318    fflush(where);
319    exit(1);
320  }
321
322  /* the file should be large enough now, so we can mmap it */
323
324  /* if the system does not have MAP_VARIABLE, just define it to */
325  /* be zero. it is only used/needed on HP-UX (?) raj 4/95 */
326#ifndef MAP_VARIABLE
327#define MAP_VARIABLE 0x0000
328#endif /* MAP_VARIABLE */
329#ifndef MAP_FILE
330#define MAP_FILE 0x0000
331#endif /* MAP_FILE */
332  if ((lib_base_pointer = (long *)mmap(NULL,
333                                       file_size,
334                                       PROT_READ | PROT_WRITE,
335                                       MAP_FILE | MAP_SHARED | MAP_VARIABLE,
336                                       lib_idle_fd,
337                                       0)) == (long *)-1) {
338    fprintf(where,"create_looper: mmap: errno %d\n",errno);
339    fflush(where);
340    exit(1);
341  }
342
343
344  if (debug > 1) {
345    fprintf(where,"num CPUs %d, file_size %d, lib_base_pointer %p\n",
346            lib_num_loc_cpus,
347            file_size,
348            lib_base_pointer);
349    fflush(where);
350  }
351
352  /* we should have a valid base pointer. lets fork */
353
354  for (i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
355    switch (lib_idle_pids[i] = fork()) {
356    case -1:
357      perror("netperf: fork");
358      exit(1);
359    case 0:
360      /* we are the child. we could decide to exec some separate */
361      /* program, but that doesn't really seem worthwhile - raj 4/95 */
362
363      signal(SIGTERM, SIG_DFL);
364      sit_and_spin(i);
365
366      /* we should never really get here, but if we do, just exit(0) */
367      exit(0);
368      break;
369    default:
370      /* we must be the parent */
371      lib_idle_address[i] = (uint64_t *) ((char *)lib_base_pointer +
372                                      (netlib_get_page_size() *
373                                       PAGES_PER_CHILD * i));
374      if (debug) {
375        fprintf(where,"lib_idle_address[%d] is %p\n",
376                i,
377                lib_idle_address[i]);
378        fflush(where);
379      }
380    }
381  }
382#else
383  /* we are compiled -DWIN32 */
384  if ((lib_base_pointer = malloc(file_size)) == NULL) {
385    fprintf(where,
386            "create_looper_process could not malloc %d bytes\n",
387            file_size);
388    fflush(where);
389    exit(1);
390  }
391
392  /* now, create all the threads */
393  for(i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
394    long place_holder;
395    if ((lib_idle_pids[i] = CreateThread(0,
396                                         0,
397                                         (LPTHREAD_START_ROUTINE)sit_and_spin,
398                                         (LPVOID)(ULONG_PTR)i,
399                                         0,
400                                         &place_holder)) == NULL ) {
401      fprintf(where,
402              "create_looper_process: CreateThread failed\n");
403      fflush(where);
404      /* I wonder if I need to look for other threads to kill? */
405      exit(1);
406    }
407    lib_idle_address[i] = (long *) ((char *)lib_base_pointer +
408                                    (netlib_get_page_size() *
409                                     PAGES_PER_CHILD * i));
410    if (debug) {
411      fprintf(where,"lib_idle_address[%d] is %p\n",
412              i,
413              lib_idle_address[i]);
414      fflush(where);
415    }
416  }
417#endif /* WIN32 */
418
419  /* we need to have the looper processes settled-in before we do */
420  /* anything with them, so lets sleep for say 30 seconds. raj 4/95 */
421
422  sleep(30);
423}
424
425void
426cpu_util_init(void)
427{
428  cpu_method = LOOPER;
429
430  /* we want to get the looper processes going */
431  if (!lib_loopers_running) {
432    start_looper_processes();
433    lib_loopers_running = 1;
434  }
435
436  return;
437}
438
439/* clean-up any left-over CPU util resources - looper processes,
440   files, whatever.  raj 2005-01-26 */
441void
442cpu_util_terminate() {
443
444#ifdef WIN32
445  /* it would seem that if/when the process exits, all the threads */
446  /* will go away too, so I don't think I need any explicit thread */
447  /* killing calls here. raj 1/96 */
448#else
449
450  int i;
451
452  /* now go through and kill-off all the child processes */
453  for (i = 0; i < lib_num_loc_cpus; i++){
454    /* SIGKILL can leave core files behind - thanks to Steinar Haug */
455    /* for pointing that out. */
456    kill(lib_idle_pids[i],SIGTERM);
457  }
458  lib_loopers_running = 0;
459  /* reap the children */
460  while(waitpid(-1, NULL, WNOHANG) > 0) { }
461
462  /* finally, unlink the mmaped file */
463  munmap((caddr_t)lib_base_pointer,
464         ((netlib_get_page_size() * PAGES_PER_CHILD) *
465          lib_num_loc_cpus));
466  unlink("/tmp/netperf_cpu");
467#endif
468  return;
469}
470
471int
472get_cpu_method(void)
473{
474  return LOOPER;
475}
476
477 /* calibrate_looper */
478
479 /* Loop a number of iterations, sleeping interval seconds each and */
480 /* count how high the idle counter gets each time. Return  the */
481 /* measured cpu rate to the calling routine. raj 4/95 */
482
483float
484calibrate_idle_rate (int iterations, int interval)
485{
486
487  uint64_t
488    firstcnt[MAXCPUS],
489    secondcnt[MAXCPUS];
490
491  float
492    elapsed,
493    temp_rate,
494    rate[MAXTIMES],
495    local_maxrate;
496
497  long
498    sec,
499    usec;
500
501  int
502    i,
503    j;
504
505  struct  timeval time1, time2 ;
506  struct  timezone tz;
507
508  if (iterations > MAXTIMES) {
509    iterations = MAXTIMES;
510  }
511
512  local_maxrate = (float)-1.0;
513
514  for(i = 0; i < iterations; i++) {
515    rate[i] = (float)0.0;
516    for (j = 0; j < lib_num_loc_cpus; j++) {
517      firstcnt[j] = *(lib_idle_address[j]);
518    }
519    gettimeofday (&time1, &tz);
520    sleep(interval);
521    gettimeofday (&time2, &tz);
522
523    if (time2.tv_usec < time1.tv_usec)
524      {
525        time2.tv_usec += 1000000;
526        time2.tv_sec -=1;
527      }
528    sec = time2.tv_sec - time1.tv_sec;
529    usec = time2.tv_usec - time1.tv_usec;
530    elapsed = (float)sec + ((float)usec/(float)1000000.0);
531
532    if(debug) {
533      fprintf(where, "Calibration for counter run: %d\n",i);
534      fprintf(where,"\tsec = %ld usec = %ld\n",sec,usec);
535      fprintf(where,"\telapsed time = %g\n",elapsed);
536    }
537
538    for (j = 0; j < lib_num_loc_cpus; j++) {
539      secondcnt[j] = *(lib_idle_address[j]);
540      if(debug) {
541        /* I know that there are situations where compilers know about */
542        /* long long, but the library fucntions do not... raj 4/95 */
543        fprintf(where,
544                "\tfirstcnt[%d] = 0x%8.8lx%8.8lx secondcnt[%d] = 0x%8.8lx%8.8lx\n",
545                j,
546                (uint32_t)(firstcnt[j]>>32),
547                (uint32_t)(firstcnt[j]&0xffffffff),
548                j,
549                (uint32_t)(secondcnt[j]>>32),
550                (uint32_t)(secondcnt[j]&0xffffffff));
551      }
552      /* we assume that it would wrap no more than once. we also */
553      /* assume that the result of subtracting will "fit" raj 4/95 */
554      temp_rate = (secondcnt[j] >= firstcnt[j]) ?
555        (float)(secondcnt[j] - firstcnt[j])/elapsed :
556          (float)(secondcnt[j]-firstcnt[j]+MAXLONG)/elapsed;
557      if (temp_rate > rate[i]) rate[i] = temp_rate;
558      if(debug) {
559        fprintf(where,"\trate[%d] = %g\n",i,rate[i]);
560        fflush(where);
561      }
562      if (local_maxrate < rate[i]) local_maxrate = rate[i];
563    }
564  }
565  if(debug) {
566    fprintf(where,"\tlocal maxrate = %g per sec. \n",local_maxrate);
567    fflush(where);
568  }
569  return local_maxrate;
570}
571
572
573void
574get_cpu_idle (uint64_t *res)
575{
576  int i;
577
578  for (i = 0; i < lib_num_loc_cpus; i++){
579    res[i] = *lib_idle_address[i];
580  }
581
582}
583
584float
585calc_cpu_util_internal(float elapsed_time)
586{
587  int i;
588  float correction_factor;
589  float actual_rate;
590
591  lib_local_cpu_util = (float)0.0;
592  /* It is possible that the library measured a time other than */
593  /* the one that the user want for the cpu utilization */
594  /* calculations - for example, tests that were ended by */
595  /* watchdog timers such as the udp stream test. We let these */
596  /* tests tell up what the elapsed time should be. */
597
598  if (elapsed_time != 0.0) {
599    correction_factor = (float) 1.0 +
600      ((lib_elapsed - elapsed_time) / elapsed_time);
601  }
602  else {
603    correction_factor = (float) 1.0;
604  }
605
606  for (i = 0; i < lib_num_loc_cpus; i++) {
607
608    /* it would appear that on some systems, in loopback, nice is
609     *very* effective, causing the looper process to stop dead in its
610     tracks. if this happens, we need to ensure that the calculation
611     does not go south. raj 6/95 and if we run completely out of idle,
612     the same thing could in theory happen to the USE_KSTAT path. raj
613     8/2000 */
614
615    if (lib_end_count[i] == lib_start_count[i]) {
616      lib_end_count[i]++;
617    }
618
619    actual_rate = (lib_end_count[i] > lib_start_count[i]) ?
620      (float)(lib_end_count[i] - lib_start_count[i])/lib_elapsed :
621      (float)(lib_end_count[i] - lib_start_count[i] +
622	      MAXLONG)/ lib_elapsed;
623    if (debug) {
624      fprintf(where,
625              "calc_cpu_util: actual_rate on processor %d is %f start 0x%8.8lx%8.8lx end 0x%8.8lx%8.8lx\n",
626              i,
627              actual_rate,
628              (uint32_t)(lib_start_count[i]>>32),
629              (uint32_t)(lib_start_count[i]&0xffffffff),
630              (uint32_t)(lib_end_count[i]>>32),
631              (uint32_t)(lib_end_count[i]&0xffffffff));
632    }
633    lib_local_per_cpu_util[i] = (lib_local_maxrate - actual_rate) /
634      lib_local_maxrate * 100;
635    lib_local_cpu_util += lib_local_per_cpu_util[i];
636  }
637  /* we want the average across all n processors */
638  lib_local_cpu_util /= (float)lib_num_loc_cpus;
639
640  lib_local_cpu_util *= correction_factor;
641  return lib_local_cpu_util;
642
643
644}
645void
646cpu_start_internal(void)
647{
648  get_cpu_idle(lib_start_count);
649  return;
650}
651
652void
653cpu_stop_internal(void)
654{
655  get_cpu_idle(lib_end_count);
656}
657