From cd625e9cad4acf03d0dda0059af337f70b90036b Mon Sep 17 00:00:00 2001 From: Sebastien GODARD Date: Fri, 14 Mar 2014 11:08:00 +0100 Subject: [PATCH] sar: Take into account a change of CPU number insar datafile (5) The goal of the next few patches to come is to make it possible for other activities than A_CPU (displayed by sar -u) to take into account a change of CPU count in sar data files. These activities must be directly related to CPU: At the present time these are A_PWR_CPUFREQ (displayed by sar -m CPU) and A_PWR_WGHFREQ (displayed by sar -m FREQ). With previous work done, a change from, say 6 to 8 CPU is taken into account by sar -u: $ sar -P ALL -f data Linux 3.9.10-100 (home) 02/08/2014 _x86_64_ (6 CPU) 03:09:53 PM CPU %user %nice %system %iowait %steal %idle 03:09:54 PM all 2.53 0.00 0.68 0.00 0.00 96.79 03:09:54 PM 0 2.04 0.00 1.02 0.00 0.00 96.94 03:09:54 PM 1 3.00 0.00 1.00 0.00 0.00 96.00 03:09:54 PM 2 2.02 0.00 1.01 0.00 0.00 96.97 03:09:54 PM 3 4.04 0.00 1.01 0.00 0.00 94.95 03:09:54 PM 4 3.00 0.00 0.00 0.00 0.00 97.00 03:09:54 PM 5 2.02 0.00 0.00 0.00 0.00 97.98 [...] 03:31:33 PM LINUX RESTART (8 CPU) 04:06:55 PM CPU %user %nice %system %iowait %steal %idle 04:06:56 PM all 0.38 16.62 0.38 0.00 0.00 82.62 04:06:56 PM 0 0.00 37.00 0.00 0.00 0.00 63.00 04:06:56 PM 1 0.00 77.78 1.01 0.00 0.00 21.21 04:06:56 PM 2 0.98 13.73 0.98 0.00 0.00 84.31 04:06:56 PM 3 0.00 5.05 1.01 0.00 0.00 93.94 04:06:56 PM 4 0.00 0.00 0.00 0.00 0.00 100.00 04:06:56 PM 5 0.00 0.00 0.00 0.00 0.00 100.00 04:06:56 PM 6 1.00 0.00 0.00 0.00 0.00 99.00 04:06:56 PM 7 0.00 0.00 0.00 0.00 0.00 100.00 But sar -m CPU still only displays stats for the first 6 CPU: $ sar -P ALL -m CPU -f data Linux 3.9.10-100 (home) 02/08/2014 _x86_64_ (6 CPU) 03:09:53 PM CPU MHz 03:09:54 PM all 3358.02 03:09:54 PM 0 3358.02 03:09:54 PM 1 3358.02 03:09:54 PM 2 3358.02 03:09:54 PM 3 3358.02 03:09:54 PM 4 3358.02 03:09:54 PM 5 3358.02 [...] 03:31:33 PM LINUX RESTART (8 CPU) 04:06:55 PM CPU MHz 04:06:56 PM all 2200.33 04:06:56 PM 0 1600.00 04:06:56 PM 1 3401.00 04:06:56 PM 2 1600.00 04:06:56 PM 3 3401.00 04:06:56 PM 4 1600.00 04:06:56 PM 5 1600.00 This is what we want to change here. So this current patch: 1) Changes the format of sar data file (this doesn't hurt since 10.3.1 has already changed it. Done again here just in case someone had cloned from 10.3.1 though this version has not been officially released). 2) Adds a new field in data file's header (sa_vol_act_nr), giving the number of activities in file taking into account a change of CPU count. (Such activities, like A_CPU or A_PWR_CPUFREQ are called "volatile activities" here). 3) When a restart mark is inserted, sadc also writes a few additional data giving the new number of CPU for each volatile activity in file. Of course sar has also been updated to read those data and take into account a change of CPU count for all volatile activities to display. Signed-off-by: Sebastien GODARD --- activity.c | 6 +-- sa.h | 30 +++++++----- sa_common.c | 85 ++++++++++++++++++++++------------ sadc.c | 129 ++++++++++++++++++++++++++++++++++++++++------------ sar.c | 33 ++++++++++---- 5 files changed, 200 insertions(+), 83 deletions(-) diff --git a/activity.c b/activity.c index 5f475ba..3ca4380 100644 --- a/activity.c +++ b/activity.c @@ -69,7 +69,7 @@ struct act_bitmap irq_bitmap = { */ struct activity cpu_act = { .id = A_CPU, - .options = AO_COLLECTED + AO_REMANENT + AO_GLOBAL_ITV + AO_MULTIPLE_OUTPUTS, + .options = AO_COLLECTED + AO_VOLATILE + AO_GLOBAL_ITV + AO_MULTIPLE_OUTPUTS, .magic = ACTIVITY_MAGIC_BASE, .group = G_DEFAULT, #ifdef SOURCE_SADC @@ -981,7 +981,7 @@ struct activity net_udp6_act = { /* CPU frequency */ struct activity pwr_cpufreq_act = { .id = A_PWR_CPUFREQ, - .options = AO_NULL, + .options = AO_VOLATILE, .magic = ACTIVITY_MAGIC_BASE, .group = G_POWER, #ifdef SOURCE_SADC @@ -1136,7 +1136,7 @@ struct activity huge_act = { /* CPU weighted frequency */ struct activity pwr_wghfreq_act = { .id = A_PWR_WGHFREQ, - .options = AO_NULL, + .options = AO_VOLATILE, .magic = ACTIVITY_MAGIC_BASE, .group = G_POWER, #ifdef SOURCE_SADC diff --git a/sa.h b/sa.h index 532170c..789f1f7 100644 --- a/sa.h +++ b/sa.h @@ -243,13 +243,13 @@ #define AO_SELECTED 0x02 /* * When appending data to a file, the number of items (for every activity) - * is forced to that of the file (number of network interfaces, serial lines, etc.) - * Except for remanent activities like A_CPU: If current - * machine has a different number of CPU than that of the file (but is - * equal to last_cpu_nr) then data will be appended with a number of items - * equal to that of the machine. + * is forced to that of the file (number of network interfaces, serial lines, + * etc.) Exceptions are volatile activities (like A_CPU) whose number of items + * is related to the number of CPUs: If current machine has a different number + * of CPU than that of the file (but is equal to sa_last_cpu_nr) then data + * will be appended with a number of items equal to that of the machine. */ -#define AO_REMANENT 0x04 +#define AO_VOLATILE 0x04 /* * Indicate that the interval of time, given to f_print() function * displaying statistics, should be the interval of time in jiffies @@ -271,7 +271,7 @@ #define IS_COLLECTED(m) (((m) & AO_COLLECTED) == AO_COLLECTED) #define IS_SELECTED(m) (((m) & AO_SELECTED) == AO_SELECTED) -#define IS_REMANENT(m) (((m) & AO_REMANENT) == AO_REMANENT) +#define IS_VOLATILE(m) (((m) & AO_VOLATILE) == AO_VOLATILE) #define NEED_GLOBAL_ITV(m) (((m) & AO_GLOBAL_ITV) == AO_GLOBAL_ITV) #define CLOSE_MARKUP(m) (((m) & AO_CLOSE_MARKUP) == AO_CLOSE_MARKUP) #define HAS_MULTIPLE_OUTPUTS(m) (((m) & AO_MULTIPLE_OUTPUTS) == AO_MULTIPLE_OUTPUTS) @@ -467,8 +467,9 @@ struct activity { * |-- --| * * (*)Note: If it's a special record, we may find a comment instead of - * statistics (R_COMMENT record type) or the number of CPU items (R_RESTART - * record type). + * statistics (R_COMMENT record type) or, if it's a R_RESTART record type, + * structures (of type file_activity) for the volatile + * activities. *************************************************************************** */ @@ -483,7 +484,7 @@ struct activity { * Modified to indicate that the format of the file is * no longer compatible with that of previous sysstat versions. */ -#define FORMAT_MAGIC 0x2172 +#define FORMAT_MAGIC 0x2173 /* Structure for file magic header data */ struct file_magic { @@ -524,11 +525,16 @@ struct file_header { /* * Number of CPU items (1 .. CPU_NR + 1) for the last sample in file. */ - unsigned int last_cpu_nr; + unsigned int sa_last_cpu_nr __attribute__ ((aligned (8))); /* * Number of activities saved in the file */ - unsigned int sa_nr_act __attribute__ ((aligned (8))); + unsigned int sa_nr_act; + /* + * Number of volatile activities in file. This is the number of + * file_activity structures saved after each restart mark in file. + */ + unsigned int sa_vol_act_nr; /* * Current day, month and year. * No need to save DST (Daylight Saving Time) flag, since it is not taken diff --git a/sa_common.c b/sa_common.c index 6dd6cd7..af51fec 100644 --- a/sa_common.c +++ b/sa_common.c @@ -1187,12 +1187,13 @@ void check_file_actlst(int *ifd, char *dfile, struct activity *act[], } /* * NOTA BENE: - * If current activity is A_CPU, we are setting - * act[p]->nr to fal->nr, which is the number of CPU for the - * statistics located between the start of the data file and the - * first restart mark. Remember that the number of CPU can vary + * If current activity is a volatile one then fal->nr is the + * number of items (CPU at the present time as only CPU related + * activities are volatile today) for the statistics located + * between the start of the data file and the first restart mark. + * Volatile activities have a number of items which can vary * in file. In this case, a RESTART record is followed by the - * new number of CPU. + * volatile activity structures. */ act[p]->nr = fal->nr; act[p]->nr2 = fal->nr2; @@ -1244,57 +1245,85 @@ void check_file_actlst(int *ifd, char *dfile, struct activity *act[], /* *************************************************************************** - * Set number of CPU items and reallocate CPU structures accordingly. + * Set number of items for current volatile activity and reallocate its + * structures accordingly. + * NB: As only activities related to CPU can be volatile, the number of + * items corresponds in fact to the number of CPU. * * IN: * @act Array of activities. - * @cpu_nr Number of CPU items. + * @act_nr Number of items for current volatile activity. + * @act_id Activity identification for current volatile activity. + * + * RETURN: + * -1 if unknown activity and 0 otherwise. *************************************************************************** */ -void allocate_cpu_structures(struct activity *act[], unsigned int cpu_nr) +int reallocate_vol_act_structures(struct activity *act[], unsigned int act_nr, + unsigned int act_id) { int j, p; - /* Set new CPU count and reallocate structures */ - p = get_activity_position(act, A_CPU); - act[p]->nr = cpu_nr; + if ((p = get_activity_position(act, act_id)) < 0) + /* Ignore unknown activity */ + return -1; + + act[p]->nr = act_nr; for (j = 0; j < 3; j++) { SREALLOC(act[p]->buf[j], void, act[p]->msize * act[p]->nr * act[p]->nr2); } + + return 0; } /* *************************************************************************** - * Read the new CPU count following a RESTART record. Then set corresponding - * number of items for A_CPU activity and reallocate structures. + * Read the volatile activities structures following a RESTART record. + * Then set number of items for each corresponding activity and reallocate + * structures. * * IN: * @ifd Input file descriptor. * @act Array of activities. + * @file Name of file being read. + * @file_magic file_magic structure filled with file magic header data. + * @vol_act_nr Number of volatile activities structures to read. * * RETURNS: - * New number of CPU count. + * New number of items. + * + * NB: As only activities related to CPU can be volatile, the new number of + * items corresponds in fact to the new number of CPU. *************************************************************************** */ -unsigned int read_new_cpu_nr(int ifd, struct activity *act[]) +__nr_t read_vol_act_structures(int ifd, struct activity *act[], char *file, + struct file_magic *file_magic, + unsigned int vol_act_nr) { - unsigned int new_cpu_nr; - - /* Read new number of CPU following the RESTART record */ - sa_fread(ifd, &new_cpu_nr, sizeof(unsigned int), HARD_SIZE); + struct file_activity file_act; + int item_nr = 0; + int i, rc; - if (!new_cpu_nr) { - /* CPU number cannot be zero */ - fprintf(stderr, _("Bad CPU count saved in file\n")); - close(ifd); - exit(2); + for (i = 0; i < vol_act_nr; i++) { + + sa_fread(ifd, &file_act, FILE_ACTIVITY_SIZE, HARD_SIZE); + + if (file_act.id) { + rc = reallocate_vol_act_structures(act, file_act.nr, file_act.id); + if ((rc == 0) && !item_nr) { + item_nr = file_act.nr; + } + } + /* else ignore empty structures that may exist */ } - /* Set new CPU count and reallocate structures */ - allocate_cpu_structures(act, new_cpu_nr); - - return new_cpu_nr; + if (!item_nr) { + /* All volatile activities structures cannot be empty */ + handle_invalid_sa_file(&ifd, file_magic, file, 0); + } + + return item_nr; } /* diff --git a/sadc.c b/sadc.c index e4932a1..4e764aa 100644 --- a/sadc.c +++ b/sadc.c @@ -64,8 +64,11 @@ char timestamp[2][TIMESTAMP_LEN]; struct file_header file_hdr; struct record_header record_hdr; + char comment[MAX_COMMENT_LEN]; + unsigned int id_seq[NR_ACT]; +unsigned int vol_id_seq[NR_ACT]; extern struct activity *act[]; @@ -495,18 +498,20 @@ void setup_file_hdr(int fd) /* OK, now fill the header */ file_hdr.sa_nr_act = get_activity_nr(act, AO_COLLECTED, COUNT_ACTIVITIES); + file_hdr.sa_vol_act_nr = get_activity_nr(act, AO_COLLECTED + AO_VOLATILE, + COUNT_ACTIVITIES); file_hdr.sa_day = rectime.tm_mday; file_hdr.sa_month = rectime.tm_mon; file_hdr.sa_year = rectime.tm_year; file_hdr.sa_sizeof_long = sizeof(long); /* - * This is a new file (or stdout): Field last_cpu_nr is set to the number + * This is a new file (or stdout): Field sa_last_cpu_nr is set to the number * of CPU items of the machine (1 .. CPU_NR + 1). * A_CPU activity is always collected, hence its number of items is * always counted (in sa_sys_init()). */ - file_hdr.last_cpu_nr = act[get_activity_position(act, A_CPU)]->nr; + file_hdr.sa_last_cpu_nr = act[get_activity_position(act, A_CPU)]->nr; /* Get system name, release number, hostname and machine architecture */ uname(&header); @@ -545,6 +550,11 @@ void setup_file_hdr(int fd) if ((n = write_all(fd, &file_act, FILE_ACTIVITY_SIZE)) != FILE_ACTIVITY_SIZE) goto write_error; + + /* Create sequence of volatile activities */ + if (IS_VOLATILE(act[p]->options)) { + vol_id_seq[i] = act[p]->id; + } } } @@ -557,6 +567,52 @@ write_error: exit(2); } +/* + *************************************************************************** + * Write the volatile activity structures following each restart mark. + * sa_vol_act_nr structures have to be written. + * Note that volatile activities written after the restart marks may be + * different within the same file if different versions of sysstat have been + * used to create the file and then to append data to it. + * + * IN: + * @ofd Output file descriptor. + *************************************************************************** + */ +void write_vol_act_structures(int ofd) +{ + struct file_activity file_act; + int i, p, n; + + memset(&file_act, 0, FILE_ACTIVITY_SIZE); + + for (i = 0; i < file_hdr.sa_vol_act_nr; i++) { + + if (!vol_id_seq[i]) { + /* + * Write an empty structure when current sysstat + * version know fewer volatile activities than + * the number saved in file's header. + */ + file_act.id = file_act.nr = 0; + } + else { + p = get_activity_position(act, vol_id_seq[i]); + + /* + * All the fields in file_activity structure are not used. + * In particular, act[p]->nr2 is left unmodified. + */ + file_act.id = act[p]->id; + file_act.nr = act[p]->nr; + } + + if ((n = write_all(ofd, &file_act, FILE_ACTIVITY_SIZE)) != FILE_ACTIVITY_SIZE) { + p_write_error(); + } + } +} + /* *************************************************************************** * sadc called with interval and count parameters not set: @@ -599,10 +655,8 @@ void write_special_record(int ofd, int rtype) } if (rtype == R_RESTART) { - /* Also write current number of CPU */ - if ((n = write_all(ofd, &file_hdr.last_cpu_nr, sizeof(unsigned int))) != sizeof(unsigned int)) { - p_write_error(); - } + /* Also write the volatile activities structures */ + write_vol_act_structures(ofd); } else if (rtype == R_COMMENT) { /* Also write the comment */ @@ -773,7 +827,7 @@ void open_ofile(int *ofd, char ofile[], int restart_mark) void *buffer = NULL; ssize_t sz, n; off_t fpos; - int i, p; + int i, j, p; if (!ofile[0]) return; @@ -813,7 +867,10 @@ void open_ofile(int *ofd, char ofile[], int restart_mark) SREALLOC(buffer, char, file_magic.header_size); - /* Save current file position */ + /* + * Save current file position. + * Needed later to update sa_last_cpu_nr. + */ if ((fpos = lseek(*ofd, 0, SEEK_CUR)) < 0) { perror("lseek"); exit(2); @@ -865,7 +922,7 @@ void open_ofile(int *ofd, char ofile[], int restart_mark) (act[p]->magic != file_act[i].magic)) /* * Unknown activity in list or item size has changed or - * unknown activity format. + * unknown activity format: Cannot append data to such a file. */ goto append_error; @@ -885,26 +942,38 @@ void open_ofile(int *ofd, char ofile[], int restart_mark) id_seq[i] = 0; } + j = 0; + for (i = 0; i < file_hdr.sa_nr_act; i++) { p = get_activity_position(act, file_act[i].id); - if (((act[p]->nr != file_act[i].nr) || (act[p]->nr2 != file_act[i].nr2)) && - !IS_REMANENT(act[p]->options)) { - /* - * Force number of items (serial lines, network interfaces...) - * and sub-items to that of the file (except for remanent activities), - * and reallocate structures. - */ + /* + * Force number of items (serial lines, network interfaces...) + * and sub-items to that of the file, and reallocate structures. + * Exceptions are volatile activities, for which number of items + * is kept unmodified unless its value was zero (in this case, + * it is also forced to the value of the file). + * Also keep in mind that the file cannot contain more than + * sa_vol_act_nr volatile activities. + */ + if (!IS_VOLATILE(act[p]->options) || !act[p]->nr || (j >= file_hdr.sa_vol_act_nr)) { act[p]->nr = file_act[i].nr; - act[p]->nr2 = file_act[i].nr2; - SREALLOC(act[p]->_buf0, void, act[p]->msize * act[p]->nr * act[p]->nr2); } + else { + vol_id_seq[j++] = file_act[i].id; + } + act[p]->nr2 = file_act[i].nr2; + SREALLOC(act[p]->_buf0, void, act[p]->msize * act[p]->nr * act[p]->nr2); /* Save activity sequence */ id_seq[i] = file_act[i].id; act[p]->options |= AO_COLLECTED; } + + while (j < file_hdr.sa_vol_act_nr) { + vol_id_seq[j++] = 0; + } p = get_activity_position(act, A_CPU); if (!IS_COLLECTED(act[p]->options)) { @@ -912,24 +981,24 @@ void open_ofile(int *ofd, char ofile[], int restart_mark) goto append_error; } - if (act[p]->nr != file_hdr.last_cpu_nr) { - if (!restart_mark) { - /* - * Current number of cpu items (for current system) should match - * number of cpu items of the last sample saved in file. - * Yet, this number can be different if we are inserting a restart mark. - */ - goto append_error; - } - else { + if (act[p]->nr != file_hdr.sa_last_cpu_nr) { + if (restart_mark) { /* * We are inserting a restart mark, and current machine * has a different number of CPU than that saved in file, - * so update last_cpu_nr in file's header and rewrite it. + * so update sa_last_cpu_nr in file's header and rewrite it. */ - file_hdr.last_cpu_nr = act[p]->nr; + file_hdr.sa_last_cpu_nr = act[p]->nr; rewrite_file_hdr(ofd, fpos, &file_magic); } + else { + /* + * Current number of cpu items (for current system) + * doesn't match number of cpu items of the last sample + * saved in file. + */ + goto append_error; + } } } diff --git a/sar.c b/sar.c index db7509b..8e0cc7f 100644 --- a/sar.c +++ b/sar.c @@ -608,12 +608,16 @@ int sa_read(void *buffer, int size) * @use_tm_end Set to TRUE if option -e has been used. * @rtype Record type to display. * @ifd Input file descriptor. + * @file Name of file being read. + * @file_magic file_magic structure filled with file magic header + * data. * * RETURNS: * 1 if the record has been successfully displayed, and 0 otherwise. *************************************************************************** */ -int sar_print_special(int curr, int use_tm_start, int use_tm_end, int rtype, int ifd) +int sar_print_special(int curr, int use_tm_start, int use_tm_end, int rtype, + int ifd, char *file, struct file_magic *file_magic) { char cur_time[26]; int dp = 1; @@ -629,8 +633,9 @@ int sar_print_special(int curr, int use_tm_start, int use_tm_end, int rtype, int } if (rtype == R_RESTART) { - /* Don't forget to read new number of CPU */ - new_cpu_nr = read_new_cpu_nr(ifd, act); + /* Don't forget to read the volatile activities structures */ + new_cpu_nr = read_vol_act_structures(ifd, act, file, file_magic, + file_hdr.sa_vol_act_nr); if (dp) { printf("\n%-11s LINUX RESTART\t(%d CPU)\n", @@ -696,6 +701,8 @@ void read_sadc_stat_bunch(int curr) * @rows Number of rows of screen. * @act_id Activity to display. * @file_actlst List of activities in file. + * @file Name of file being read. + * @file_magic file_magic structure filled with file magic header data. * * OUT: * @curr Index in array for next sample statistics. @@ -707,7 +714,8 @@ void read_sadc_stat_bunch(int curr) */ void handle_curr_act_stats(int ifd, off_t fpos, int *curr, long *cnt, int *eosaf, int rows, unsigned int act_id, int *reset, - struct file_activity *file_actlst) + struct file_activity *file_actlst, char *file, + struct file_magic *file_magic) { int p; unsigned long lines = 0; @@ -765,7 +773,7 @@ void handle_curr_act_stats(int ifd, off_t fpos, int *curr, long *cnt, int *eosaf if (rtype == R_COMMENT) { /* Display comment */ next = sar_print_special(*curr, tm_start.use, tm_end.use, - R_COMMENT, ifd); + R_COMMENT, ifd, file, file_magic); if (next) { /* A line of comment was actually displayed */ lines++; @@ -920,7 +928,8 @@ void read_stats_from_file(char from_file[]) rtype = record_hdr[0].record_type; if ((rtype == R_RESTART) || (rtype == R_COMMENT)) { - sar_print_special(0, tm_start.use, tm_end.use, rtype, ifd); + sar_print_special(0, tm_start.use, tm_end.use, rtype, + ifd, from_file, &file_magic); } else { /* @@ -970,7 +979,8 @@ void read_stats_from_file(char from_file[]) if (!HAS_MULTIPLE_OUTPUTS(act[p]->options)) { handle_curr_act_stats(ifd, fpos, &curr, &cnt, &eosaf, rows, - act[p]->id, &reset, file_actlst); + act[p]->id, &reset, file_actlst, + from_file, &file_magic); } else { unsigned int optf, msk; @@ -983,7 +993,8 @@ void read_stats_from_file(char from_file[]) handle_curr_act_stats(ifd, fpos, &curr, &cnt, &eosaf, rows, act[p]->id, - &reset, file_actlst); + &reset, file_actlst, + from_file, &file_magic); act[p]->opt_flags = optf; } } @@ -1002,7 +1013,8 @@ void read_stats_from_file(char from_file[]) } else if (!eosaf && (rtype == R_COMMENT)) { /* This was a COMMENT record: print it */ - sar_print_special(curr, tm_start.use, tm_end.use, R_COMMENT, ifd); + sar_print_special(curr, tm_start.use, tm_end.use, R_COMMENT, + ifd, from_file, &file_magic); } } while (!eosaf && (rtype != R_RESTART)); @@ -1010,7 +1022,8 @@ void read_stats_from_file(char from_file[]) /* The last record we read was a RESTART one: Print it */ if (!eosaf && (record_hdr[curr].record_type == R_RESTART)) { - sar_print_special(curr, tm_start.use, tm_end.use, R_RESTART, ifd); + sar_print_special(curr, tm_start.use, tm_end.use, R_RESTART, + ifd, from_file, &file_magic); } } while (!eosaf); -- 2.40.0