From: Sebastien GODARD Date: Sat, 24 Mar 2018 09:33:20 +0000 (+0100) Subject: sar: Softnet stats: Improve support for offline/online CPUs X-Git-Tag: v11.7.3~8 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2f2276bc6a2f7dd0abcc62731e830a5c1210aff4;p=sysstat sar: Softnet stats: Improve support for offline/online CPUs When a CPU goes offline, the corresponding line in the /proc/net/softnet_stat file disappears. The problem is that there is no immediate solution to know which line goes with which CPU. To fix this, we now use the /proc/stat file to know which CPU are online and which ones are not. Moreover, when a CPU comes back online, its counters get their original values, which makes sar think they have just jumped from 0: This is what sar displayed before (in this example, CPU 5 goes offline): 10:19:39 CPU total/s dropd/s squeezd/s rx_rps/s flw_lim/s 10:19:41 all 4.00 0.00 0.00 0.00 0.00 10:19:41 0 4.00 0.00 0.00 0.00 0.00 10:19:41 1 0.00 0.00 0.00 0.00 0.00 10:19:41 2 0.00 0.00 0.00 0.00 0.00 10:19:41 3 0.00 0.00 0.00 0.00 0.00 10:19:41 4 0.00 0.00 0.00 0.00 0.00 10:19:41 5 3.00 0.00 0.00 0.00 0.00 10:19:41 6 2147483647.50 0.00 0.00 0.00 0.00 There is a shift in the display resulting in wrong values displayed for CPU 5 and 6. CPU 7 disappears from the list. This commit fixes those problems. Signed-off-by: Sebastien GODARD --- diff --git a/activity.c b/activity.c index 26ed7c9..d7d2a2a 100644 --- a/activity.c +++ b/activity.c @@ -1639,7 +1639,7 @@ struct activity fchost_act = { struct activity softnet_act = { .id = A_NET_SOFT, .options = AO_COLLECTED + AO_COUNTED + AO_CLOSE_MARKUP + - AO_GRAPH_PER_ITEM, + AO_GRAPH_PER_ITEM + AO_PERSISTENT, .magic = ACTIVITY_MAGIC_BASE, .group = G_DEFAULT, #ifdef SOURCE_SADC diff --git a/pr_stats.c b/pr_stats.c index 30c2de9..8a60e61 100644 --- a/pr_stats.c +++ b/pr_stats.c @@ -2914,40 +2914,54 @@ __print_funct_t print_fchost_stats(struct activity *a, int prev, int curr, __print_funct_t print_softnet_stats(struct activity *a, int prev, int curr, unsigned long long itv) { + int i; struct stats_softnet *ssnc = (struct stats_softnet *) a->buf[curr], *ssnp = (struct stats_softnet *) a->buf[prev]; - int i; + unsigned char offline_cpu_bitmap[BITMAP_SIZE(NR_CPUS)] = {0}; if (dis || DISPLAY_ZERO_OMIT(flags)) { print_hdr_line(timestamp[!curr], a, FIRST, 7, 9); } - for (i = 0; (i < a->nr[curr]) && (i < a->bitmap->b_size + 1); i++) { + /* + * @nr[curr] cannot normally be greater than @nr_ini + * (since @nr_ini counts up all CPU, even those offline). + * If this happens, it may be because the machine has been + * restarted with more CPU and no LINUX_RESTART has been + * inserted in file. + */ + if (a->nr[curr] > a->nr_ini) { + a->nr_ini = a->nr[curr]; + } - /* - * The size of a->buf[...] CPU structure may be different from the default - * sizeof(struct stats_pwr_cpufreq) value if data have been read from a file! - * That's why we don't use a syntax like: - * ssnc = (struct stats_softnet *) a->buf[...] + i; - */ - ssnc = (struct stats_softnet *) ((char *) a->buf[curr] + i * a->msize); - ssnp = (struct stats_softnet *) ((char *) a->buf[prev] + i * a->msize); + /* Compute statistics for CPU "all" */ + get_global_soft_statistics(a, prev, curr, flags, offline_cpu_bitmap); + + for (i = 0; (i < a->nr_ini) && (i < a->bitmap->b_size + 1); i++) { /* + * Should current CPU (including CPU "all") be displayed? * Note: a->nr is in [1, NR_CPUS + 1]. * Bitmap size is provided for (NR_CPUS + 1) CPUs. * Anyway, NR_CPUS may vary between the version of sysstat * used by sadc to create a file, and the version of sysstat * used by sar to read it... */ - - /* Should current CPU (including CPU "all") be displayed? */ - if (!(a->bitmap->b_array[i >> 3] & (1 << (i & 0x07)))) + if (!(a->bitmap->b_array[i >> 3] & (1 << (i & 0x07))) || + offline_cpu_bitmap[i >> 3] & (1 << (i & 0x07))) /* No */ continue; + /* + * The size of a->buf[...] CPU structure may be different from the default + * sizeof(struct stats_pwr_cpufreq) value if data have been read from a file! + * That's why we don't use a syntax like: + * ssnc = (struct stats_softnet *) a->buf[...] + i; + */ + ssnc = (struct stats_softnet *) ((char *) a->buf[curr] + i * a->msize); + ssnp = (struct stats_softnet *) ((char *) a->buf[prev] + i * a->msize); - if (DISPLAY_ZERO_OMIT(flags) && !memcmp(ssnp, ssnc,STATS_SOFTNET_SIZE)) + if (DISPLAY_ZERO_OMIT(flags) && !memcmp(ssnp, ssnc, STATS_SOFTNET_SIZE)) continue; printf("%-11s", timestamp[curr]); diff --git a/rd_stats.c b/rd_stats.c index 8b578dd..cd6cab5 100644 --- a/rd_stats.c +++ b/rd_stats.c @@ -2603,58 +2603,47 @@ __nr_t read_fchost(struct stats_fchost *st_fc, __nr_t nr_alloc) * IN: * @st_softnet Structure where stats will be saved. * @nr_alloc Total number of structures allocated. Value is >= 0. + * @online_cpu_bitmap + * Bitmap listing online CPU. * * OUT: * @st_softnet Structure with statistics. * * RETURNS: - * Number of CPU for which statistics have been read. - * 1 means CPU "all", 2 means CPU 0, 3 means CPU 1, etc. - * Or -1 if the buffer was too small and needs to be reallocated. + * 1 if stats have been sucessfully read, or 0 otherwise. *************************************************************************** */ -__nr_t read_softnet(struct stats_softnet *st_softnet, __nr_t nr_alloc) +int read_softnet(struct stats_softnet *st_softnet, __nr_t nr_alloc, + unsigned char online_cpu_bitmap[]) { FILE *fp; struct stats_softnet *st_softnet_i; char line[1024]; - __nr_t cpu_read = 1; /* For CPU "all" */ + int cpu; /* Open /proc/net/softnet_stat file */ if ((fp = fopen(NET_SOFTNET, "r")) == NULL) return 0; - /* - * Init a structure that will contain the values for CPU "all". - * CPU "all" doesn't exist in /proc/net/softnet_stat file, so - * we compute its values as the sum of the values of each CPU. - */ - memset(st_softnet, 0, sizeof(struct stats_softnet)); - - while (fgets(line, sizeof(line), fp) != NULL) { + for (cpu = 1; cpu < nr_alloc; cpu++) { + if (!(online_cpu_bitmap[(cpu - 1) >> 3] & (1 << ((cpu - 1) & 0x07)))) + /* CPU is offline */ + continue; - if (cpu_read + 1 > nr_alloc) { - cpu_read = -1; + if (fgets(line, sizeof(line), fp) == NULL) break; - } - st_softnet_i = st_softnet + cpu_read++; + st_softnet_i = st_softnet + cpu; sscanf(line, "%x %x %x %*x %*x %*x %*x %*x %*x %x %x", &st_softnet_i->processed, &st_softnet_i->dropped, &st_softnet_i->time_squeeze, &st_softnet_i->received_rps, &st_softnet_i->flow_limit); - - st_softnet->processed += st_softnet_i->processed; - st_softnet->dropped += st_softnet_i->dropped; - st_softnet->time_squeeze += st_softnet_i->time_squeeze; - st_softnet->received_rps += st_softnet_i->received_rps; - st_softnet->flow_limit += st_softnet_i->flow_limit; } fclose(fp); - return cpu_read; + return 1; } /*------------------ END: FUNCTIONS USED BY SADC ONLY ---------------------*/ diff --git a/rd_stats.h b/rd_stats.h index 909a46b..93bda7c 100644 --- a/rd_stats.h +++ b/rd_stats.h @@ -781,8 +781,8 @@ __nr_t read_filesystem (struct stats_filesystem *, __nr_t); __nr_t read_fchost (struct stats_fchost *, __nr_t); -__nr_t read_softnet - (struct stats_softnet *, __nr_t); +int read_softnet + (struct stats_softnet *, __nr_t, unsigned char []); #endif /* SOURCE_SADC */ #endif /* _RD_STATS_H */ diff --git a/sa.h b/sa.h index 8d36e0c..dbb48e6 100644 --- a/sa.h +++ b/sa.h @@ -659,6 +659,7 @@ struct record_header { * Indicate that activity's metrics have persistent values when devices * are registered again (this means that when the device is registered again, * the metrics pick the values they had when they had been unregistered). + * Exclusively used for CPU related statistics at the present time. */ #define AO_PERSISTENT 0x08 /* @@ -1257,6 +1258,8 @@ void get_file_timestamp_struct (unsigned int, struct tm *, struct file_header *); unsigned long long get_global_cpu_statistics (struct activity *, int, int, unsigned int, unsigned char []); +void get_global_soft_statistics + (struct activity *, int, int, unsigned int, unsigned char []); void get_itv_value (struct record_header *, struct record_header *, unsigned long long *); int next_slice diff --git a/sa_common.c b/sa_common.c index 3548da6..ea659e4 100644 --- a/sa_common.c +++ b/sa_common.c @@ -2647,17 +2647,20 @@ int print_special_record(struct record_header *record_hdr, unsigned int l_flags, endian_mismatch, arch_64, TRUE); /* - * We don't know if A_CPU activity will be displayed or not. + * We don't know if CPU related activities will be displayed or not. * But if it is the case, @nr_ini will be used in the loop - * to display all processors. So update its value here and + * to process all CPUs. So update their value here and * reallocate buffers if needed. - * NB: We may have nr_allocated=0 here if A_CPU activity has + * NB: We may have nr_allocated=0 here if the activity has * not been collected in file (or if it has an unknown format). */ - p = get_activity_position(act, A_CPU, EXIT_IF_NOT_FOUND); - act[p]->nr_ini = file_hdr->sa_cpu_nr; - if (act[p]->nr_ini > act[p]->nr_allocated) { - reallocate_all_buffers(act[p], act[p]->nr_ini); + for (p = 0; p < NR_ACT; p++) { + if (HAS_PERSISTENT_VALUES(act[p]->options)) { + act[p]->nr_ini = file_hdr->sa_cpu_nr; + if (act[p]->nr_ini > act[p]->nr_allocated) { + reallocate_all_buffers(act[p], act[p]->nr_ini); + } + } } if (!dp) @@ -2839,4 +2842,80 @@ unsigned long long get_global_cpu_statistics(struct activity *a, int prev, int c return deltot_jiffies; } +/* + *************************************************************************** + * Compute softnet statistics for CPU "all" as the sum of individual CPU + * ones. + * Also identify offline CPU. + * + * IN: + * @a Activity structure with statistics. + * @prev Index in array where stats used as reference are. + * @curr Index in array for current sample statistics. + * @flags Flags for common options and system state. + * @offline_cpu_bitmap + * CPU bitmap for offline CPU. + * + * OUT: + * @a Activity structure with updated statistics (those for global + * CPU, and also those for offline CPU). + * @offline_cpu_bitmap + * CPU bitmap with offline CPU. + *************************************************************************** + */ +void get_global_soft_statistics(struct activity *a, int prev, int curr, + unsigned int flags, unsigned char offline_cpu_bitmap[]) +{ + int i; + struct stats_softnet *ssnc, *ssnp; + struct stats_softnet *ssnc_all = (struct stats_softnet *) ((char *) a->buf[curr]); + struct stats_softnet *ssnp_all = (struct stats_softnet *) ((char *) a->buf[prev]); + + /* + * Init structures that will contain values for CPU "all". + * CPU "all" doesn't exist in /proc/net/softnet_stat file, so + * we compute its values as the sum of the values of each CPU. + */ + memset(ssnc_all, 0, sizeof(struct stats_softnet)); + memset(ssnp_all, 0, sizeof(struct stats_softnet)); + + for (i = 1; (i < a->nr_ini) && (i < a->bitmap->b_size + 1); i++) { + + /* + * The size of a->buf[...] CPU structure may be different from the default + * sizeof(struct stats_pwr_cpufreq) value if data have been read from a file! + * That's why we don't use a syntax like: + * ssnc = (struct stats_softnet *) a->buf[...] + i; + */ + ssnc = (struct stats_softnet *) ((char *) a->buf[curr] + i * a->msize); + ssnp = (struct stats_softnet *) ((char *) a->buf[prev] + i * a->msize); + + if (ssnc->processed + ssnc->dropped + ssnc->time_squeeze + + ssnc->received_rps + ssnc->flow_limit == 0) { + /* Assume current CPU is offline */ + *ssnc = *ssnp; + offline_cpu_bitmap[i >> 3] |= 1 << (i & 0x07); + } + + if ((ssnp->processed + ssnp->dropped + ssnp->time_squeeze + + ssnp->received_rps + ssnp->flow_limit == 0) && !WANT_SINCE_BOOT(flags)) { + /* Current CPU back online but no previous sample for it */ + offline_cpu_bitmap[i >> 3] |= 1 << (i & 0x07); + continue; + } + + ssnc_all->processed += ssnc->processed; + ssnc_all->dropped += ssnc->dropped; + ssnc_all->time_squeeze += ssnc->time_squeeze; + ssnc_all->received_rps += ssnc->received_rps; + ssnc_all->flow_limit += ssnc->flow_limit; + + ssnp_all->processed += ssnp->processed; + ssnp_all->dropped += ssnp->dropped; + ssnp_all->time_squeeze += ssnp->time_squeeze; + ssnp_all->received_rps += ssnp->received_rps; + ssnp_all->flow_limit += ssnp->flow_limit; + } +} + #endif /* SOURCE_SADC undefined */ diff --git a/sa_wrap.c b/sa_wrap.c index 50f1302..f46b3a6 100644 --- a/sa_wrap.c +++ b/sa_wrap.c @@ -1058,6 +1058,52 @@ __read_funct_t wrap_read_fchost(struct activity *a) return; } +/* + *************************************************************************** + * Look for online CPU and fill corresponding bitmap. + * + * IN: + * @bitmap_size Size of the CPU bitmap. + * + * OUT: + * @online_cpu_bitmap + * CPU bitmap which has been filled. + * + * RETURNS: + * Number of CPU for which statistics have to be be read. + * 1 means CPU "all", 2 means CPU 0, 3 means CPU 1, etc. + * Or -1 if the buffer was too small and needs to be reallocated. + *************************************************************************** + */ +int get_online_cpu_list(unsigned char online_cpu_bitmap[], int bitmap_size) +{ + FILE *fp; + char line[8192]; + int proc_nr; + + if ((fp = fopen(STAT, "r")) == NULL) + return 0; + + while (fgets(line, sizeof(line), fp) != NULL) { + + if (!strncmp(line, "cpu ", 4)) + continue; + + if (!strncmp(line, "cpu", 3)) { + sscanf(line + 3, "%d", &proc_nr); + } + + if (proc_nr + 1 > bitmap_size) { + fclose(fp); + return -1; + } + online_cpu_bitmap[proc_nr >> 3] |= 1 << (proc_nr & 0x07); + } + + fclose(fp); + return proc_nr + 2; +} + /* *************************************************************************** * Read softnet statistics. @@ -1074,15 +1120,37 @@ __read_funct_t wrap_read_softnet(struct activity *a) struct stats_softnet *st_softnet = (struct stats_softnet *) a->_buf0; __nr_t nr_read = 0; + static unsigned char *online_cpu_bitmap = NULL; + static int bitmap_size = 0; /* Read softnet stats */ do { - nr_read = read_softnet(st_softnet, a->nr_allocated); + /* Allocate bitmap for online CPU */ + if (bitmap_size < a->nr_allocated) { + if ((online_cpu_bitmap = (unsigned char *) realloc(online_cpu_bitmap, + BITMAP_SIZE(a->nr_allocated))) == NULL) { + nr_read = 0; + break; + } + bitmap_size = a->nr_allocated; + } + memset(online_cpu_bitmap, 0, BITMAP_SIZE(a->nr_allocated)); + + /* Get online CPU list */ + nr_read = get_online_cpu_list(online_cpu_bitmap, bitmap_size); + if (!nr_read) + break; if (nr_read < 0) { /* Buffer needs to be reallocated */ st_softnet = (struct stats_softnet *) reallocate_buffer(a); } + else { + if (!read_softnet(st_softnet, a->nr_allocated, online_cpu_bitmap)) { + /* File /proc/net/softnet doesn't exist */ + nr_read = 0; + } + } } while (nr_read < 0);