1char   netcpu_kstat10_id[]="\
2@(#)netcpu_kstat10.c (c) Copyright 2005-2012, Hewlett-Packard Company Version 2.6.0";
3
4#if HAVE_CONFIG_H
5# include <config.h>
6#endif
7
8#include <stdio.h>
9
10#if HAVE_INTTYPES_H
11# include <inttypes.h>
12#else
13# if HAVE_STDINT_H
14#  include <stdint.h>
15# endif
16#endif
17
18#if HAVE_UNISTD_H
19# include <unistd.h>
20#endif
21#if HAVE_STRINGS_H
22# include <strings.h>
23#endif
24#if STDC_HEADERS
25# include <stdlib.h>
26# include <stddef.h>
27#else
28# if HAVE_STDLIB_H
29#  include <stdlib.h>
30# endif
31#endif
32
33#include <errno.h>
34
35#include <kstat.h>
36#include <sys/sysinfo.h>
37
38#include "netsh.h"
39#include "netlib.h"
40
41static kstat_ctl_t *kc = NULL;
42static kid_t kcid = 0;
43
44typedef struct cpu_time_counters {
45  uint64_t idle;
46  uint64_t user;
47  uint64_t kernel;
48  uint64_t interrupt;
49} cpu_time_counters_t;
50
51static cpu_time_counters_t starting_cpu_counters[MAXCPUS];
52static cpu_time_counters_t ending_cpu_counters[MAXCPUS];
53static cpu_time_counters_t delta_cpu_counters[MAXCPUS];
54static cpu_time_counters_t corrected_cpu_counters[MAXCPUS];
55
56static void
57print_cpu_time_counters(char *name, int instance, cpu_time_counters_t *counters)
58{
59  fprintf(where,
60	  "%s[%d]:\n"
61	  "\t idle %llu\n"
62	  "\t user %llu\n"
63	  "\t kernel %llu\n"
64	  "\t interrupt %llu\n",
65	  name,instance,
66	  counters[instance].idle,
67	  counters[instance].user,
68	  counters[instance].kernel,
69	  counters[instance].interrupt);
70}
71
72void
73cpu_util_init(void)
74{
75  kstat_t   *ksp;
76  int i;
77  kc = kstat_open();
78
79  if (kc == NULL) {
80    fprintf(where,
81	    "cpu_util_init: kstat_open: errno %d %s\n",
82	    errno,
83	    strerror(errno));
84    fflush(where);
85    exit(-1);
86  }
87
88  /* lets flesh-out a CPU instance number map since it seems that some
89     systems, not even those which are partitioned, can have
90     non-contiguous CPU numbers.  discovered "the hard way" on a
91     T5220. raj 20080804 */
92  i = 0;
93  for (ksp = kc->kc_chain, i = 0;
94       (ksp != NULL) && (i < MAXCPUS);
95       ksp = ksp->ks_next) {
96    if ((strcmp(ksp->ks_module,"cpu") == 0) &&
97	(strcmp(ksp->ks_name,"sys") == 0)) {
98      if (debug) {
99	fprintf(where,"Mapping CPU instance %d to entry %d\n",
100		ksp->ks_instance,i);
101	fflush(where);
102      }
103      lib_cpu_map[i++] = ksp->ks_instance;
104    }
105  }
106
107  if (MAXCPUS == i) {
108    fprintf(where,
109            "Sorry, this system has more CPUs (%d) than netperf can handle (%d).\n"
110            "Please alter MAXCPUS in netlib.h and recompile.\n",
111            i,
112            MAXCPUS);
113    fflush(where);
114    exit(1);
115  }
116
117  return;
118}
119
120void
121cpu_util_terminate(void)
122{
123  kstat_close(kc);
124  return;
125}
126
127int
128get_cpu_method(void)
129{
130  return KSTAT_10;
131}
132
133static void
134print_unexpected_statistic_warning(char *who, char *what, char *why)
135{
136  if (why) {
137    fprintf(where,
138	    "WARNING! WARNING! WARNING! WARNING!\n"
139	    "%s found an unexpected %s statistic %.16s\n",
140	    who,
141	    why,
142	    what);
143  }
144  else {
145    fprintf(where,
146	    "%s is ignoring statistic %.16s\n",
147	    who,
148	    what);
149  }
150}
151
152static void
153get_cpu_counters(int cpu_num, cpu_time_counters_t *counters)
154{
155
156  kstat_t *ksp;
157  int found=0;
158  kid_t nkcid;
159  kstat_named_t *knp;
160  int i;
161
162  ksp = kstat_lookup(kc, "cpu", lib_cpu_map[cpu_num], "sys");
163  if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) {
164    /* happiness and joy, keep going */
165    nkcid = kstat_read(kc, ksp, NULL);
166    if (nkcid != -1) {
167      /* happiness and joy, keep going. we could consider adding a
168	 "found < 3" to the end conditions, but then we wouldn't
169	 search to the end and find that Sun added some nsec. we
170	 probably want to see if they add an nsec. raj 2005-01-28 */
171      for (i = ksp->ks_ndata, knp = ksp->ks_data;
172	   i > 0;
173	   knp++,i--) {
174	/* we would be hosed if the same name could appear twice */
175	if (!strcmp("cpu_nsec_idle",knp->name)) {
176	  found++;
177	  counters[cpu_num].idle = knp->value.ui64;
178	}
179	else if (!strcmp("cpu_nsec_user",knp->name)) {
180	  found++;
181	  counters[cpu_num].user = knp->value.ui64;
182	}
183	else if (!strcmp("cpu_nsec_kernel",knp->name)) {
184	  found++;
185	  counters[cpu_num].kernel = knp->value.ui64;
186	}
187	else if (!strcmp("cpu_nsec_intr",knp->name)) {
188	  if (debug >= 2) {
189	    fprintf(where,
190		    "Found a cpu_nsec_intr but it doesn't do what we want\n");
191	    fflush(where);
192	  }
193	}
194	else if (strstr(knp->name,"nsec")) {
195	  /* finding another nsec here means Sun have changed
196	     something and we need to warn the user. raj 2005-01-28 */
197	  print_unexpected_statistic_warning("get_cpu_counters",
198					     knp->name,
199					     "nsec");
200	}
201	else if (debug >=2) {
202
203	  /* might want to tell people about what we are skipping.
204	     however, only display other names debug >=2. raj
205	     2005-01-28  */
206
207	  print_unexpected_statistic_warning("get_cpu_counters",
208					     knp->name,
209					     NULL);
210	}
211      }
212      if (3 == found) {
213	/* happiness and joy */
214	return;
215      }
216      else {
217	fprintf(where,
218		"get_cpu_counters could not find one or more of the expected counters!\n");
219	fflush(where);
220	exit(-1);
221      }
222    }
223    else {
224      /* the kstat_read returned an error or the chain changed */
225      fprintf(where,
226	      "get_cpu_counters: kstat_read failed or chain id changed %d %s\n",
227	      errno,
228	      strerror(errno));
229      fflush(where);
230      exit(-1);
231    }
232  }
233  else {
234    /* the lookup failed or found the wrong type */
235    fprintf(where,
236	    "get_cpu_counters: kstat_lookup failed for module 'cpu' number %d instance %d name 'sys' and KSTAT_TYPE_NAMED: errno %d %s\n",
237	    cpu_num,
238	    lib_cpu_map[cpu_num],
239	    errno,
240	    strerror(errno));
241    fflush(where);
242    exit(-1);
243  }
244}
245
246static void
247get_interrupt_counters(int cpu_num, cpu_time_counters_t *counters)
248{
249  kstat_t *ksp;
250  int found=0;
251  kid_t nkcid;
252  kstat_named_t *knp;
253  int i;
254
255  ksp = kstat_lookup(kc, "cpu", lib_cpu_map[cpu_num], "intrstat");
256
257  counters[cpu_num].interrupt = 0;
258  if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) {
259    /* happiness and joy, keep going */
260    nkcid = kstat_read(kc, ksp, NULL);
261    if (nkcid != -1) {
262      /* happiness and joy, keep going. we could consider adding a
263	 "found < 15" to the end conditions, but then we wouldn't
264	 search to the end and find that Sun added some "time." we
265	 probably want to see if they add a "nsec." raj 2005-01-28 */
266      for (i = ksp->ks_ndata, knp = ksp->ks_data;
267	   i > 0;
268	   knp++,i--) {
269	if (strstr(knp->name,"time")) {
270	  found++;
271	  counters[cpu_num].interrupt += knp->value.ui64;
272	}
273	else if (debug >=2) {
274
275	  /* might want to tell people about what we are skipping.
276	     however, only display other names debug >=2. raj
277	     2005-01-28
278	  */
279
280	  print_unexpected_statistic_warning("get_cpu_counters",
281					     knp->name,
282					     NULL);
283	}
284      }
285      if (15 == found) {
286	/* happiness and joy */
287	return;
288      }
289      else {
290	fprintf(where,
291		"get_cpu_counters could not find one or more of the expected counters!\n");
292	fflush(where);
293	exit(-1);
294      }
295    }
296    else {
297      /* the kstat_read returned an error or the chain changed */
298      fprintf(where,
299	      "get_cpu_counters: kstat_read failed or chain id changed %d %s\n",
300	      errno,
301	      strerror(errno));
302      fflush(where);
303      exit(-1);
304    }
305  }
306  else {
307    /* the lookup failed or found the wrong type */
308    fprintf(where,
309	    "get_cpu_counters: kstat_lookup failed for module 'cpu' %d instance %d class 'intrstat' and KSTAT_TYPE_NAMED: errno %d %s\n",
310	    cpu_num,
311	    lib_cpu_map[cpu_num],
312	    errno,
313	    strerror(errno));
314    fflush(where);
315    exit(-1);
316  }
317
318}
319
320static void
321get_cpu_time_counters(cpu_time_counters_t *counters)
322{
323
324  int i;
325
326  for (i = 0; i < lib_num_loc_cpus; i++){
327    get_cpu_counters(i, counters);
328    get_interrupt_counters(i, counters);
329  }
330
331  return;
332}
333
334/* the kstat10 mechanism, since it is based on actual nanosecond
335   counters is not going to use a comparison to an idle rate. so, the
336   calibrate_idle_rate routine will be rather simple :) raj 2005-01-28
337   */
338
339float
340calibrate_idle_rate(int iterations, int interval)
341{
342  return 0.0;
343}
344
345float
346calc_cpu_util_internal(float elapsed_time)
347{
348  int i;
349  float correction_factor;
350  float actual_rate;
351
352  uint64_t total_cpu_nsec;
353
354  /* multiply by 100 and divide by total and you get whole
355     percentages. multiply by 1000 and divide by total and you get
356     tenths of percentages.  multiply by 10000 and divide by total and
357     you get hundredths of percentages. etc etc etc raj 2005-01-28 */
358
359#define CALC_PERCENT 100
360#define CALC_TENTH_PERCENT 1000
361#define CALC_HUNDREDTH_PERCENT 10000
362#define CALC_THOUSANDTH_PERCENT 100000
363#define CALC_ACCURACY CALC_THOUSANDTH_PERCENT
364
365  uint64_t fraction_idle;
366  uint64_t fraction_user;
367  uint64_t fraction_kernel;
368  uint64_t fraction_interrupt;
369
370  uint64_t interrupt_idle;
371  uint64_t interrupt_user;
372  uint64_t interrupt_kernel;
373
374  memset(&lib_local_cpu_stats, 0, sizeof(lib_local_cpu_stats));
375
376  /* It is possible that the library measured a time other than the
377     one that the user want for the cpu utilization calculations - for
378     example, tests that were ended by watchdog timers such as the udp
379     stream test. We let these tests tell up what the elapsed time
380     should be. */
381
382  if (elapsed_time != 0.0) {
383    correction_factor = (float) 1.0 +
384      ((lib_elapsed - elapsed_time) / elapsed_time);
385  }
386  else {
387    correction_factor = (float) 1.0;
388  }
389
390  for (i = 0; i < lib_num_loc_cpus; i++) {
391
392    /* this is now the fun part.  we have the nanoseconds _allegedly_
393       spent in user, idle and kernel.  We also have nanoseconds spent
394       servicing interrupts.  Sadly, in the developer's finite wisdom,
395       the interrupt time accounting is in parallel with the other
396       accounting. this means that time accounted in user, kernel or
397       idle will also include time spent in interrupt.  for netperf's
398       porpoises we do not really care about that for user and kernel,
399       but we certainly do care for idle.  the $64B question becomes -
400       how to "correct" for this?
401
402       we could just subtract interrupt time from idle.  that has the
403       virtue of simplicity and also "punishes" Sun for doing
404       something that seems to be so stupid.  however, we probably
405       have to be "fair" even to the allegedly stupid so the other
406       mechanism, suggested by a Sun engineer is to subtract interrupt
407       time from each of user, kernel and idle in proportion to their
408       numbers.  then we sum the corrected user, kernel and idle along
409       with the interrupt time and use that to calculate a new idle
410       percentage and thus a CPU util percentage.
411
412       that is what we will attempt to do here.  raj 2005-01-28
413
414       of course, we also have to wonder what we should do if there is
415       more interrupt time than the sum of user, kernel and idle.
416       that is a theoretical possibility I suppose, but for the
417       time-being, one that we will blythly ignore, except perhaps for
418       a quick check. raj 2005-01-31
419    */
420
421    /* we ass-u-me that these counters will never wrap during a
422       netperf run.  this may not be a particularly safe thing to
423       do. raj 2005-01-28 */
424    delta_cpu_counters[i].idle = ending_cpu_counters[i].idle -
425      starting_cpu_counters[i].idle;
426    delta_cpu_counters[i].user = ending_cpu_counters[i].user -
427      starting_cpu_counters[i].user;
428    delta_cpu_counters[i].kernel = ending_cpu_counters[i].kernel -
429      starting_cpu_counters[i].kernel;
430    delta_cpu_counters[i].interrupt = ending_cpu_counters[i].interrupt -
431      starting_cpu_counters[i].interrupt;
432
433    if (debug) {
434      print_cpu_time_counters("delta_cpu_counters",i,delta_cpu_counters);
435    }
436
437    /* for this summation, we do not include interrupt time */
438    total_cpu_nsec =
439      delta_cpu_counters[i].idle +
440      delta_cpu_counters[i].user +
441      delta_cpu_counters[i].kernel;
442
443    if (debug) {
444      fprintf(where,"total_cpu_nsec %llu\n",total_cpu_nsec);
445    }
446
447    if (delta_cpu_counters[i].interrupt > total_cpu_nsec) {
448      /* we are not in Kansas any more Toto, and I am not quite sure
449	 the best way to get our tails out of here so let us just
450	 punt. raj 2005-01-31 */
451      fprintf(where,
452	      "WARNING! WARNING! WARNING! WARNING! WARNING! \n"
453	      "calc_cpu_util_internal: more interrupt time than others combined!\n"
454	      "\tso CPU util cannot be estimated\n"
455	      "\t delta[%d].interrupt %llu\n"
456	      "\t delta[%d].idle %llu\n"
457	      "\t delta[%d].user %llu\n"
458	      "\t delta[%d].kernel %llu\n",
459	      i,delta_cpu_counters[i].interrupt,
460	      i,delta_cpu_counters[i].idle,
461	      i,delta_cpu_counters[i].user,
462	      i,delta_cpu_counters[i].kernel);
463      fflush(where);
464
465      lib_local_cpu_stats.cpu_util = -1.0;
466      lib_local_per_cpu_util[i] = -1.0;
467      return -1.0;
468    }
469
470    /* and now some fun with integer math.  i initially tried to
471       promote things to long doubled but that didn't seem to result
472       in happiness and joy. raj 2005-01-28 */
473
474    fraction_idle =
475      (delta_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec;
476
477    fraction_user =
478      (delta_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec;
479
480    fraction_kernel =
481      (delta_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec;
482
483    /* ok, we have our fractions, now we want to take that fraction of
484       the interrupt time and subtract that from the bucket. */
485
486    interrupt_idle =  ((delta_cpu_counters[i].interrupt * fraction_idle) /
487		       CALC_ACCURACY);
488
489    interrupt_user = ((delta_cpu_counters[i].interrupt * fraction_user) /
490		      CALC_ACCURACY);
491
492    interrupt_kernel = ((delta_cpu_counters[i].interrupt * fraction_kernel) /
493			CALC_ACCURACY);
494
495    if (debug) {
496      fprintf(where,
497	      "\tfraction_idle %llu interrupt_idle %llu\n"
498	      "\tfraction_user %llu interrupt_user %llu\n"
499	      "\tfraction_kernel %llu interrupt_kernel %llu\n",
500	      fraction_idle,
501	      interrupt_idle,
502	      fraction_user,
503	      interrupt_user,
504	      fraction_kernel,
505	      interrupt_kernel);
506    }
507
508    corrected_cpu_counters[i].idle = delta_cpu_counters[i].idle -
509      interrupt_idle;
510
511    corrected_cpu_counters[i].user = delta_cpu_counters[i].user -
512      interrupt_user;
513
514    corrected_cpu_counters[i].kernel = delta_cpu_counters[i].kernel -
515      interrupt_kernel;
516
517    corrected_cpu_counters[i].interrupt = delta_cpu_counters[i].interrupt;
518
519    if (debug) {
520      print_cpu_time_counters("corrected_cpu_counters",
521			      i,
522			      corrected_cpu_counters);
523    }
524
525    /* I was going to check for going less than zero, but since all
526       the calculations are in unsigned quantities that would seem to
527       be a triffle silly... raj 2005-01-28 */
528
529    /* ok, now we sum the numbers again, this time including interrupt
530       */
531
532    total_cpu_nsec =
533      corrected_cpu_counters[i].idle +
534      corrected_cpu_counters[i].user +
535      corrected_cpu_counters[i].kernel +
536      corrected_cpu_counters[i].interrupt;
537
538    /* and recalculate our fractions we are really only going to use
539       fraction_idle, but lets calculate the rest just for the heck of
540       it. one day we may want to display them. raj 2005-01-28 */
541
542    /* multiply by 100 and divide by total and you get whole
543       percentages. multiply by 1000 and divide by total and you get
544       tenths of percentages.  multiply by 10000 and divide by total
545       and you get hundredths of percentages. etc etc etc raj
546       2005-01-28 */
547    fraction_idle =
548      (corrected_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec;
549
550    fraction_user =
551      (corrected_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec;
552
553    fraction_kernel =
554      (corrected_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec;
555
556    fraction_interrupt =
557      (corrected_cpu_counters[i].interrupt * CALC_ACCURACY) / total_cpu_nsec;
558
559    if (debug) {
560      fprintf(where,"\tfraction_idle %lu\n",fraction_idle);
561      fprintf(where,"\tfraction_user %lu\n",fraction_user);
562      fprintf(where,"\tfraction_kernel %lu\n",fraction_kernel);
563      fprintf(where,"\tfraction_interrupt %lu\n",fraction_interrupt);
564    }
565
566    /* and finally, what is our CPU utilization? */
567    lib_local_per_cpu_util[i] = 100.0 - (((float)fraction_idle /
568					  (float)CALC_ACCURACY) * 100.0);
569    lib_local_per_cpu_util[i] *= correction_factor;
570    if (debug) {
571      fprintf(where,
572	      "lib_local_per_cpu_util[%d] %g cf %f\n",
573	      i,
574	      lib_local_per_cpu_util[i],
575	      correction_factor);
576    }
577    lib_local_cpu_stats.cpu_util += lib_local_per_cpu_util[i];
578  }
579  /* we want the average across all n processors */
580  lib_local_cpu_stats.cpu_util /= (float)lib_num_loc_cpus;
581
582  return lib_local_cpu_stats.cpu_util;
583}
584
585void
586cpu_start_internal(void)
587{
588  get_cpu_time_counters(starting_cpu_counters);
589  return;
590}
591
592void
593cpu_stop_internal(void)
594{
595  get_cpu_time_counters(ending_cpu_counters);
596}
597