1char   netcpu_kstat10_id[]="\
2@(#)netcpu_kstat10.c (c) Copyright 2005-2007, Hewlett-Packard Company Version 2.4.3";
3
4#if HAVE_CONFIG_H
5# include <config.h>
6#endif
7
8#include <stdio.h>
9
10#if HAVE_INTTYPES_H
11# include <inttypes.h>
12#else
13# if HAVE_STDINT_H
14#  include <stdint.h>
15# endif
16#endif
17
18#if HAVE_UNISTD_H
19# include <unistd.h>
20#endif
21#if HAVE_STRINGS_H
22# include <strings.h>
23#endif
24#if STDC_HEADERS
25# include <stdlib.h>
26# include <stddef.h>
27#else
28# if HAVE_STDLIB_H
29#  include <stdlib.h>
30# endif
31#endif
32
33#include <errno.h>
34
35#include <kstat.h>
36#include <sys/sysinfo.h>
37
38#include "netsh.h"
39#include "netlib.h"
40
41static kstat_ctl_t *kc = NULL;
42static kid_t kcid = 0;
43
44typedef struct cpu_time_counters {
45  uint64_t idle;
46  uint64_t user;
47  uint64_t kernel;
48  uint64_t interrupt;
49} cpu_time_counters_t;
50
51static cpu_time_counters_t starting_cpu_counters[MAXCPUS];
52static cpu_time_counters_t ending_cpu_counters[MAXCPUS];
53static cpu_time_counters_t delta_cpu_counters[MAXCPUS];
54static cpu_time_counters_t corrected_cpu_counters[MAXCPUS];
55
56static void
57print_cpu_time_counters(char *name, int instance, cpu_time_counters_t *counters)
58{
59  fprintf(where,"%s[%d]:\n",name,instance);
60  fprintf(where,
61	  "\t idle %llu\n",counters[instance].idle);
62  fprintf(where,
63	  "\t user %llu\n",counters[instance].user);
64  fprintf(where,
65	  "\t kernel %llu\n",counters[instance].kernel);
66  fprintf(where,
67	  "\t interrupt %llu\n",counters[instance].interrupt);
68}
69
70void
71cpu_util_init(void)
72{
73  kc = kstat_open();
74
75  if (kc == NULL) {
76    fprintf(where,
77	    "cpu_util_init: kstat_open: errno %d %s\n",
78	    errno,
79	    strerror(errno));
80    fflush(where);
81    exit(-1);
82  }
83  return;
84}
85
86void
87cpu_util_terminate(void)
88{
89  kstat_close(kc);
90  return;
91}
92
93int
94get_cpu_method(void)
95{
96  return KSTAT_10;
97}
98
99static void
100print_unexpected_statistic_warning(char *who, char *what, char *why)
101{
102  if (why) {
103    fprintf(where,
104	    "WARNING! WARNING! WARNING! WARNING!\n");
105    fprintf(where,
106	    "%s found an unexpected %s statistic %.16s\n",
107	    who,
108	    why,
109	    what);
110  }
111  else {
112    fprintf(where,
113	    "%s is ignoring statistic %.16s\n",
114	    who,
115	    what);
116  }
117}
118
119static void
120get_cpu_counters(int cpu_num, cpu_time_counters_t *counters)
121{
122
123  kstat_t *ksp;
124  int found=0;
125  kid_t nkcid;
126  kstat_named_t *knp;
127  int i;
128
129  ksp = kstat_lookup(kc, "cpu", cpu_num, "sys");
130  if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) {
131    /* happiness and joy, keep going */
132    nkcid = kstat_read(kc, ksp, NULL);
133    if (nkcid != -1) {
134      /* happiness and joy, keep going. we could consider adding a
135	 "found < 3" to the end conditions, but then we wouldn't
136	 search to the end and find that Sun added some nsec. we
137	 probably want to see if they add an nsec. raj 2005-01-28 */
138      for (i = ksp->ks_ndata, knp = ksp->ks_data;
139	   i > 0;
140	   knp++,i--) {
141	/* we would be hosed if the same name could appear twice */
142	if (!strcmp("cpu_nsec_idle",knp->name)) {
143	  found++;
144	  counters[cpu_num].idle = knp->value.ui64;
145	}
146	else if (!strcmp("cpu_nsec_user",knp->name)) {
147	  found++;
148	  counters[cpu_num].user = knp->value.ui64;
149	}
150	else if (!strcmp("cpu_nsec_kernel",knp->name)) {
151	  found++;
152	  counters[cpu_num].kernel = knp->value.ui64;
153	}
154	else if (strstr(knp->name,"nsec")) {
155	  /* finding another nsec here means Sun have changed
156	     something and we need to warn the user. raj 2005-01-28 */
157	  print_unexpected_statistic_warning("get_cpu_counters",
158					     knp->name,
159					     "nsec");
160	}
161	else if (debug >=2) {
162
163	  /* might want to tell people about what we are skipping.
164	     however, only display other names debug >=2. raj
165	     2005-01-28
166	  */
167
168	  print_unexpected_statistic_warning("get_cpu_counters",
169					     knp->name,
170					     NULL);
171	}
172      }
173      if (3 == found) {
174	/* happiness and joy */
175	return;
176      }
177      else {
178	fprintf(where,
179		"get_cpu_counters could not find one or more of the expected counters!\n");
180	fflush(where);
181	exit(-1);
182      }
183    }
184    else {
185      /* the kstat_read returned an error or the chain changed */
186      fprintf(where,
187	      "get_cpu_counters: kstat_read failed or chain id changed %d %s\n",
188	      errno,
189	      strerror(errno));
190      fflush(where);
191      exit(-1);
192    }
193  }
194  else {
195    /* the lookup failed or found the wrong type */
196    fprintf(where,
197	    "get_cpu_counters: kstat_lookup failed for module 'cpu' instance %d name 'sys' and KSTAT_TYPE_NAMED: errno %d %s\n",
198	    cpu_num,
199	    errno,
200	    strerror(errno));
201    fflush(where);
202    exit(-1);
203  }
204}
205
206static void
207get_interrupt_counters(int cpu_num, cpu_time_counters_t *counters)
208{
209  kstat_t *ksp;
210  int found=0;
211  kid_t nkcid;
212  kstat_named_t *knp;
213  int i;
214
215  ksp = kstat_lookup(kc, "cpu", cpu_num, "intrstat");
216
217  counters[cpu_num].interrupt = 0;
218  if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) {
219    /* happiness and joy, keep going */
220    nkcid = kstat_read(kc, ksp, NULL);
221    if (nkcid != -1) {
222      /* happiness and joy, keep going. we could consider adding a
223	 "found < 15" to the end conditions, but then we wouldn't
224	 search to the end and find that Sun added some "time." we
225	 probably want to see if they add a "nsec." raj 2005-01-28 */
226      for (i = ksp->ks_ndata, knp = ksp->ks_data;
227	   i > 0;
228	   knp++,i--) {
229	if (strstr(knp->name,"time")) {
230	  found++;
231	  counters[cpu_num].interrupt += knp->value.ui64;
232	}
233	else if (debug >=2) {
234
235	  /* might want to tell people about what we are skipping.
236	     however, only display other names debug >=2. raj
237	     2005-01-28
238	  */
239
240	  print_unexpected_statistic_warning("get_cpu_counters",
241					     knp->name,
242					     NULL);
243	}
244      }
245      if (15 == found) {
246	/* happiness and joy */
247	return;
248      }
249      else {
250	fprintf(where,
251		"get_cpu_counters could not find one or more of the expected counters!\n");
252	fflush(where);
253	exit(-1);
254      }
255    }
256    else {
257      /* the kstat_read returned an error or the chain changed */
258      fprintf(where,
259	      "get_cpu_counters: kstat_read failed or chain id changed %d %s\n",
260	      errno,
261	      strerror(errno));
262      fflush(where);
263      exit(-1);
264    }
265  }
266  else {
267    /* the lookup failed or found the wrong type */
268    fprintf(where,
269	    "get_cpu_counters: kstat_lookup failed for module 'cpu' instance %d class 'intrstat' and KSTAT_TYPE_NAMED: errno %d %s\n",
270	    cpu_num,
271	    errno,
272	    strerror(errno));
273    fflush(where);
274    exit(-1);
275  }
276
277}
278
279static void
280get_cpu_time_counters(cpu_time_counters_t *counters)
281{
282
283  int i;
284
285  for (i = 0; i < lib_num_loc_cpus; i++){
286    get_cpu_counters(i, counters);
287    get_interrupt_counters(i, counters);
288  }
289
290  return;
291}
292
293/* the kstat10 mechanism, since it is based on actual nanosecond
294   counters is not going to use a comparison to an idle rate. so, the
295   calibrate_idle_rate routine will be rather simple :) raj 2005-01-28
296   */
297
298float
299calibrate_idle_rate(int iterations, int interval)
300{
301  return 0.0;
302}
303
304float
305calc_cpu_util_internal(float elapsed_time)
306{
307  int i;
308  float correction_factor;
309  float actual_rate;
310
311  uint64_t total_cpu_nsec;
312
313  /* multiply by 100 and divide by total and you get whole
314     percentages. multiply by 1000 and divide by total and you get
315     tenths of percentages.  multiply by 10000 and divide by total and
316     you get hundredths of percentages. etc etc etc raj 2005-01-28 */
317
318#define CALC_PERCENT 100
319#define CALC_TENTH_PERCENT 1000
320#define CALC_HUNDREDTH_PERCENT 10000
321#define CALC_THOUSANDTH_PERCENT 100000
322#define CALC_ACCURACY CALC_THOUSANDTH_PERCENT
323
324  uint64_t fraction_idle;
325  uint64_t fraction_user;
326  uint64_t fraction_kernel;
327  uint64_t fraction_interrupt;
328
329  uint64_t interrupt_idle;
330  uint64_t interrupt_user;
331  uint64_t interrupt_kernel;
332
333  lib_local_cpu_util = (float)0.0;
334
335  /* It is possible that the library measured a time other than */
336  /* the one that the user want for the cpu utilization */
337  /* calculations - for example, tests that were ended by */
338  /* watchdog timers such as the udp stream test. We let these */
339  /* tests tell up what the elapsed time should be. */
340
341  if (elapsed_time != 0.0) {
342    correction_factor = (float) 1.0 +
343      ((lib_elapsed - elapsed_time) / elapsed_time);
344  }
345  else {
346    correction_factor = (float) 1.0;
347  }
348
349  for (i = 0; i < lib_num_loc_cpus; i++) {
350
351    /* this is now the fun part.  we have the nanoseconds _allegedly_
352       spent in user, idle and kernel.  We also have nanoseconds spent
353       servicing interrupts.  Sadly, in the developer's finite wisdom,
354       the interrupt time accounting is in parallel with the other
355       accounting. this means that time accounted in user, kernel or
356       idle will also include time spent in interrupt.  for netperf's
357       porpoises we do not really care about that for user and kernel,
358       but we certainly do care for idle.  the $64B question becomes -
359       how to "correct" for this?
360
361       we could just subtract interrupt time from idle.  that has the
362       virtue of simplicity and also "punishes" Sun for doing
363       something that seems to be so stupid.  however, we probably
364       have to be "fair" even to the allegedly stupid so the other
365       mechanism, suggested by a Sun engineer is to subtract interrupt
366       time from each of user, kernel and idle in proportion to their
367       numbers.  then we sum the corrected user, kernel and idle along
368       with the interrupt time and use that to calculate a new idle
369       percentage and thus a CPU util percentage.
370
371       that is what we will attempt to do here.  raj 2005-01-28
372
373       of course, we also have to wonder what we should do if there is
374       more interrupt time than the sum of user, kernel and idle.
375       that is a theoretical possibility I suppose, but for the
376       time-being, one that we will blythly ignore, except perhaps for
377       a quick check. raj 2005-01-31
378    */
379
380    /* we ass-u-me that these counters will never wrap during a
381       netperf run.  this may not be a particularly safe thing to
382       do. raj 2005-01-28 */
383    delta_cpu_counters[i].idle = ending_cpu_counters[i].idle -
384      starting_cpu_counters[i].idle;
385    delta_cpu_counters[i].user = ending_cpu_counters[i].user -
386      starting_cpu_counters[i].user;
387    delta_cpu_counters[i].kernel = ending_cpu_counters[i].kernel -
388      starting_cpu_counters[i].kernel;
389    delta_cpu_counters[i].interrupt = ending_cpu_counters[i].interrupt -
390      starting_cpu_counters[i].interrupt;
391
392    if (debug) {
393      print_cpu_time_counters("delta_cpu_counters",i,delta_cpu_counters);
394    }
395
396    /* for this summation, we do not include interrupt time */
397    total_cpu_nsec =
398      delta_cpu_counters[i].idle +
399      delta_cpu_counters[i].user +
400      delta_cpu_counters[i].kernel;
401
402    if (debug) {
403      fprintf(where,"total_cpu_nsec %llu\n",total_cpu_nsec);
404    }
405
406    if (delta_cpu_counters[i].interrupt > total_cpu_nsec) {
407      /* we are not in Kansas any more Toto, and I am not quite sure
408	 the best way to get our tails out of here so let us just
409	 punt. raj 2005-01-31 */
410      fprintf(where,
411	      "WARNING! WARNING! WARNING! WARNING! WARNING! \n");
412      fprintf(where,
413	      "calc_cpu_util_internal: more interrupt time than others combined!\n");
414      fprintf(where,
415	      "\tso CPU util cannot be estimated\n");
416      fprintf(where,
417	      "\t delta[%d].interrupt %llu\n",i,delta_cpu_counters[i].interrupt);
418      fprintf(where,
419	      "\t delta[%d].idle %llu\n",i,delta_cpu_counters[i].idle);
420      fprintf(where,
421	      "\t delta[%d].user %llu\n",i,delta_cpu_counters[i].user);
422      fprintf(where,
423	      "\t delta[%d].kernel %llu\n",i,delta_cpu_counters[i].kernel);
424      fflush(where);
425
426      lib_local_cpu_util = -1.0;
427      lib_local_per_cpu_util[i] = -1.0;
428      return -1.0;
429    }
430
431    /* and now some fun with integer math.  i initially tried to
432       promote things to long doubled but that didn't seem to result
433       in happiness and joy. raj 2005-01-28 */
434
435    fraction_idle =
436      (delta_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec;
437
438    fraction_user =
439      (delta_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec;
440
441    fraction_kernel =
442      (delta_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec;
443
444    /* ok, we have our fractions, now we want to take that fraction of
445       the interrupt time and subtract that from the bucket. */
446
447    interrupt_idle =  ((delta_cpu_counters[i].interrupt * fraction_idle) /
448		       CALC_ACCURACY);
449
450    interrupt_user = ((delta_cpu_counters[i].interrupt * fraction_user) /
451		      CALC_ACCURACY);
452
453    interrupt_kernel = ((delta_cpu_counters[i].interrupt * fraction_kernel) /
454			CALC_ACCURACY);
455
456    if (debug) {
457      fprintf(where,
458	      "\tfraction_idle %llu interrupt_idle %llu\n",
459	      fraction_idle,
460	      interrupt_idle);
461      fprintf(where,
462	      "\tfraction_user %llu interrupt_user %llu\n",
463	      fraction_user,
464	      interrupt_user);
465      fprintf(where,"\tfraction_kernel %llu interrupt_kernel %llu\n",
466	      fraction_kernel,
467	      interrupt_kernel);
468    }
469
470    corrected_cpu_counters[i].idle = delta_cpu_counters[i].idle -
471      interrupt_idle;
472
473    corrected_cpu_counters[i].user = delta_cpu_counters[i].user -
474      interrupt_user;
475
476    corrected_cpu_counters[i].kernel = delta_cpu_counters[i].kernel -
477      interrupt_kernel;
478
479    corrected_cpu_counters[i].interrupt = delta_cpu_counters[i].interrupt;
480
481    if (debug) {
482      print_cpu_time_counters("corrected_cpu_counters",
483			      i,
484			      corrected_cpu_counters);
485    }
486
487    /* I was going to checkfor going less than zero, but since all the
488       calculations are in unsigned quantities that would seem to be a
489       triffle silly... raj 2005-01-28 */
490
491    /* ok, now we sum the numbers again, this time including interrupt
492       */
493
494    total_cpu_nsec =
495      corrected_cpu_counters[i].idle +
496      corrected_cpu_counters[i].user +
497      corrected_cpu_counters[i].kernel +
498      corrected_cpu_counters[i].interrupt;
499
500    /* and recalculate our fractions we are really only going to use
501       fraction_idle, but lets calculate the rest just for the heck of
502       it. one day we may want to display them. raj 2005-01-28 */
503
504    /* multiply by 100 and divide by total and you get whole
505       percentages. multiply by 1000 and divide by total and you get
506       tenths of percentages.  multiply by 10000 and divide by total
507       and you get hundredths of percentages. etc etc etc raj
508       2005-01-28 */
509    fraction_idle =
510      (corrected_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec;
511
512    fraction_user =
513      (corrected_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec;
514
515    fraction_kernel =
516      (corrected_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec;
517
518    fraction_interrupt =
519      (corrected_cpu_counters[i].interrupt * CALC_ACCURACY) / total_cpu_nsec;
520
521    if (debug) {
522      fprintf(where,"\tfraction_idle %lu\n",fraction_idle);
523      fprintf(where,"\tfraction_user %lu\n",fraction_user);
524      fprintf(where,"\tfraction_kernel %lu\n",fraction_kernel);
525      fprintf(where,"\tfraction_interrupt %lu\n",fraction_interrupt);
526    }
527
528    /* and finally, what is our CPU utilization? */
529    lib_local_per_cpu_util[i] = 100.0 - (((float)fraction_idle /
530					  (float)CALC_ACCURACY) * 100.0);
531    if (debug) {
532      fprintf(where,
533	      "lib_local_per_cpu_util[%d] %g\n",
534	      i,
535	      lib_local_per_cpu_util[i]);
536    }
537    lib_local_cpu_util += lib_local_per_cpu_util[i];
538  }
539  /* we want the average across all n processors */
540  lib_local_cpu_util /= (float)lib_num_loc_cpus;
541
542  lib_local_cpu_util *= correction_factor;
543  return lib_local_cpu_util;
544
545
546}
547
548void
549cpu_start_internal(void)
550{
551  get_cpu_time_counters(starting_cpu_counters);
552  return;
553}
554
555void
556cpu_stop_internal(void)
557{
558  get_cpu_time_counters(ending_cpu_counters);
559}
560