]> granicus.if.org Git - sysstat/commitdiff
Fixed bogus CPU statistics output, which happened when
authorSebastien Godard <sysstat@orange.fr>
Mon, 22 Nov 2010 14:22:01 +0000 (15:22 +0100)
committerSebastien Godard <sysstat@orange.fr>
Mon, 22 Nov 2010 14:22:01 +0000 (15:22 +0100)
CPU user value from /proc/stat wasn't incremented whereas
CPU guest value was.

From the Fedora Bugzilla database.
Ivana Varekova      2010-10-15 09:05:41 EDT

Description of problem:
The output of sar command is bogus, the value of %usr overflows

Version-Release number of selected component (if applicable):
last upstream (http://sebastien.godard.pagesperso-orange.fr/download.html) -
sysstat-9.1.5

How reproducible:

Steps to Reproduce:
1.# sar -u ALL -P ALL 1 1000
Linux 2.6.32.21-168.fc12.i686 (localhost) _i686_ (2 CPU)
...
02:59:54 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
02:59:56 PM     all      5.24      0.00      4.52      0.00      0.00     90.24
02:59:56 PM       0 4487176417.15    0.00      7.46      0.00      0.00
85.07
02:59:56 PM       1      3.20      0.00      1.83      0.00      0.00     94.98
....
2.
3.

Actual results:
02:59:56 PM       0 4487176417.15    0.00      7.46      0.00      0.00
85.07
                      ^should be zero

Expected results:
02:59:56 PM       0   0    0.00      7.46      0.00      0.00     85.07

Additional info:
the problem happens if user value prom /proc/stat is not incremented, and guest
value form /proc/stat for the same cpu is incremented. In this case sar should
output 0 as an %usr value.

(This situation can happened - see the code in kernel: account_guest_time
function) e.g.:

time     cpu  user nice sys idle iowait hardirq softirq steal guest
2:59:55 cpu0 2235996 20046 7569883 24586493 187483 3258 3744 0 55430
2:59:56 cpu0 2235996 20046 7569885 24586498 187482 3258 3744 0 55431

[Remember that user value should already include guest value].

Against 9.1.5:

--- pr_stats.c.orig    2010-09-04 08:05:58.000000000 +0200
+++ pr_stats.c    2010-10-20 10:07:29.719376868 +0200
@@ -171,6 +171,8 @@ __print_funct_t print_cpu_stats(struct a
             else if (DISPLAY_CPU_ALL(a->opt_flags)) {
                 printf("    %6.2f    %6.2f    %6.2f    %6.2f    %6.2f    %6.2f"
                        "    %6.2f    %6.2f    %6.2f\n",
+                       (scc->cpu_user - scc->cpu_guest)<  (scp->cpu_user - scp->cpu_guest) ?
+                       0.0 :
                        ll_sp_value(scp->cpu_user - scp->cpu_guest,
                            scc->cpu_user - scc->cpu_guest,     g_itv),
                        ll_sp_value(scp->cpu_nice,    scc->cpu_nice,    g_itv),

CHANGES
mpstat.c
pidstat.c
pr_stats.c
rndr_stats.c
xml_stats.c

diff --git a/CHANGES b/CHANGES
index 4f1d2e1691948dabd9b39c4471c36bdd65b316c4..b52901c5c28968063ed1dd21655333820f1b8257 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,9 @@ xxxx/xx/xx: Version 9.1.7 - Sebastien Godard (sysstat <at> orange.fr)
        * sar now tells sadc to read only the necessary groups of
          activities.
        * [Ivana Varekova]: Fix segfaults on bogus localtime input.
+       * Fixed bogus CPU statistics output, which happened when
+         CPU user value from /proc/stat wasn't incremented whereas
+         CPU guest value was.
        * sar manual page updated.
 
 2010/11/10: Version 9.1.6 - Sebastien Godard (sysstat <at> orange.fr)
index 7adcea71d8c77c1c7f92ef4171604bdabddb0499..ca7d46144832aaa455d480a6567a821f5ef3d5c0 100644 (file)
--- a/mpstat.c
+++ b/mpstat.c
@@ -350,6 +350,9 @@ void write_stats_core(int prev, int curr, int dis,
                        printf("%-11s  all", curr_string);
 
                        printf("  %6.2f  %6.2f  %6.2f  %6.2f  %6.2f  %6.2f  %6.2f  %6.2f  %6.2f\n",
+                              (st_cpu[curr]->cpu_user - st_cpu[curr]->cpu_guest) <
+                              (st_cpu[prev]->cpu_user - st_cpu[prev]->cpu_guest) ?
+                              0.0 :
                               ll_sp_value(st_cpu[prev]->cpu_user - st_cpu[prev]->cpu_guest,
                                           st_cpu[curr]->cpu_user - st_cpu[curr]->cpu_guest,
                                           g_itv),
@@ -375,7 +378,7 @@ void write_stats_core(int prev, int curr, int dis,
                                           st_cpu[curr]->cpu_guest,
                                           g_itv),
                               (st_cpu[curr]->cpu_idle < st_cpu[prev]->cpu_idle) ?
-                              0.0 :    /* Handle buggy kernels */
+                              0.0 :
                               ll_sp_value(st_cpu[prev]->cpu_idle,
                                           st_cpu[curr]->cpu_idle,
                                           g_itv));
@@ -429,6 +432,8 @@ void write_stats_core(int prev, int curr, int dis,
                        else {
                                printf("  %6.2f  %6.2f  %6.2f  %6.2f  %6.2f  %6.2f"
                                       "  %6.2f  %6.2f  %6.2f\n",
+                                      (scc->cpu_user - scc->cpu_guest) < (scp->cpu_user - scp->cpu_guest) ?
+                                      0.0 :
                                       ll_sp_value(scp->cpu_user - scp->cpu_guest,
                                                   scc->cpu_user - scc->cpu_guest,
                                                   pc_itv),
index aa0f984299c1cd20d0eee2045d2b890680a34a7e..68e0012c0be95dd710c69afe9ca2da5d33d91ff7 100644 (file)
--- a/pidstat.c
+++ b/pidstat.c
@@ -1134,6 +1134,8 @@ int write_pid_task_all_stats(int prev, int curr, int dis,
 
                if (DISPLAY_CPU(actflag)) {
                        printf(" %7.2f %7.2f %7.2f %7.2f",
+                              (psti->utime - psti->gtime) < (pstj->utime - pstj->gtime) ?
+                              0.0 :
                               SP_VALUE(pstj->utime - pstj->gtime,
                                        psti->utime - psti->gtime, itv),
                               SP_VALUE(pstj->stime,  psti->stime, itv),
@@ -1238,6 +1240,9 @@ int write_pid_child_all_stats(int prev, int curr, int dis,
 
                if (DISPLAY_CPU(actflag)) {
                        printf(" %9.0f %9.0f %9.0f",
+                              (psti->utime + psti->cutime - psti->gtime - psti->cgtime) <
+                              (pstj->utime + pstj->cutime - pstj->gtime - pstj->cgtime) ?
+                              0.0 :
                               (double) ((psti->utime + psti->cutime - psti->gtime - psti->cgtime) -
                                         (pstj->utime + pstj->cutime - pstj->gtime - pstj->cgtime)) /
                               HZ * 1000,
@@ -1307,6 +1312,8 @@ int write_pid_task_cpu_stats(int prev, int curr, int dis, int disp_avg,
        
                print_line_id(curr_string, psti);
                printf(" %7.2f %7.2f %7.2f %7.2f",
+                      (psti->utime - psti->gtime) < (pstj->utime - pstj->gtime) ?
+                      0.0 :
                       SP_VALUE(pstj->utime - pstj->gtime,
                                psti->utime - psti->gtime, itv),
                       SP_VALUE(pstj->stime,  psti->stime, itv),
@@ -1383,6 +1390,9 @@ int write_pid_child_cpu_stats(int prev, int curr, int dis, int disp_avg,
                print_line_id(curr_string, psti);
                if (disp_avg) {
                        printf(" %9.0f %9.0f %9.0f",
+                              (psti->utime + psti->cutime - psti->gtime - psti->cgtime) <
+                              (pstj->utime + pstj->cutime - pstj->gtime - pstj->cgtime) ?
+                              0.0 :
                               (double) ((psti->utime + psti->cutime - psti->gtime - psti->cgtime) -
                                         (pstj->utime + pstj->cutime - pstj->gtime - pstj->cgtime)) /
                               (HZ * psti->uc_asum_count) * 1000,
@@ -1395,6 +1405,9 @@ int write_pid_child_cpu_stats(int prev, int curr, int dis, int disp_avg,
                }
                else {
                        printf(" %9.0f %9.0f %9.0f",
+                              (psti->utime + psti->cutime - psti->gtime - psti->cgtime) <
+                              (pstj->utime + pstj->cutime - pstj->gtime - pstj->cgtime) ?
+                              0.0 :
                               (double) ((psti->utime + psti->cutime - psti->gtime - psti->cgtime) -
                                         (pstj->utime + pstj->cutime - pstj->gtime - pstj->cgtime)) /
                               HZ * 1000,
index 6438ab32d9968e35a6b580bc73faaa7830fd0fd1..86d9a8c7a8dac9340956b8ea50fb85b38a3a465d 100644 (file)
@@ -171,6 +171,8 @@ __print_funct_t print_cpu_stats(struct activity *a, int prev, int curr,
                        else if (DISPLAY_CPU_ALL(a->opt_flags)) {
                                printf("    %6.2f    %6.2f    %6.2f    %6.2f    %6.2f    %6.2f"
                                       "    %6.2f    %6.2f    %6.2f\n",
+                                      (scc->cpu_user - scc->cpu_guest) < (scp->cpu_user - scp->cpu_guest) ?
+                                      0.0 :
                                       ll_sp_value(scp->cpu_user - scp->cpu_guest,
                                                   scc->cpu_user - scc->cpu_guest,     g_itv),
                                       ll_sp_value(scp->cpu_nice,    scc->cpu_nice,    g_itv),
index d661e2efe8f141f11a068b6f2e07225c4f4a703d..de0b840898665b264e1f9907ae515b51a177310f 100644 (file)
@@ -190,6 +190,8 @@ __print_funct_t render_cpu_stats(struct activity *a, int isdb, char *pre,
                                        render(isdb, pre, PT_NOFLAG,
                                               "all\t%%usr", "-1", NULL,
                                               NOVAL,
+                                              (scc->cpu_user - scc->cpu_guest) < (scp->cpu_user - scp->cpu_guest) ?
+                                              0.0 :
                                               ll_sp_value(scp->cpu_user - scp->cpu_guest,
                                                           scc->cpu_user - scc->cpu_guest,
                                                           g_itv));
@@ -291,7 +293,8 @@ __print_funct_t render_cpu_stats(struct activity *a, int isdb, char *pre,
                                        render(isdb, pre, PT_NOFLAG,
                                               "cpu%d\t%%usr", "%d", cons(iv, i - 1, NOVAL),
                                               NOVAL,
-                                              !g_itv ?
+                                              (!g_itv ||
+                                              ((scc->cpu_user - scc->cpu_guest) < (scp->cpu_user - scp->cpu_guest))) ?
                                               0.0 :                    /* CPU is offline or tickless */
                                               ll_sp_value(scp->cpu_user - scp->cpu_guest,
                                                           scc->cpu_user - scc->cpu_guest, g_itv));
index 444a2714a2de6a606ecb2e482339672b700c961f..df9fe1467ea4664c2d9f207e78c7aa4dff89d535 100644 (file)
@@ -263,6 +263,8 @@ __print_funct_t xml_print_cpu_stats(struct activity *a, int curr, int tab,
                                        "guest=\"%.2f\" "
                                        "idle=\"%.2f\"/>",
                                        cpuno,
+                                       (scc->cpu_user - scc->cpu_guest) < (scp->cpu_user - scp->cpu_guest) ?
+                                       0.0 :
                                        ll_sp_value(scp->cpu_user - scp->cpu_guest,
                                                    scc->cpu_user - scc->cpu_guest,     g_itv),
                                        ll_sp_value(scp->cpu_nice,    scc->cpu_nice,    g_itv),