]> granicus.if.org Git - libevent/commit
[core] re-order fields in struct event for memory efficiency
authorNathan French <nate@cl0d.com>
Mon, 30 Apr 2018 22:13:45 +0000 (18:13 -0400)
committerNathan French <nate@cl0d.com>
Mon, 30 Apr 2018 22:13:45 +0000 (18:13 -0400)
commit1af8017648d63d461737805e2607174c38708950
treeecd6a94ccf635fb77f3bcd720976ae484a40dc15
parenta5f19422fdc91cf1e645fc318a174f33f132b9ac
[core] re-order fields in struct event for memory efficiency

The sizeof `struct event` can reduced on both 32 bit and 64 bit systems
by moving the 4 bytes that make up `ev_events` and `ev_res` below `ev_fd`,
before `struct event_base * ev_base;` since our compiler wouldn't dare do
such a thing (it instead will pad twice, whereas it only needs to be padded
once)

```C
struct event {
                                                         /*  OFFS |  SZ   Bytes  | Total Bytes | START - END  */
    struct event_callback ev_evcallback;                 /*   0x0 |  40          | 40          | 0x0   - 0x28 */

    union {                                              /*  0x28 |  ----------- | ----------- | ------------ */
        TAILQ_ENTRY(event) ev_next_with_common_timeout;  /*       |       ((16)) |             |              */
        int min_heap_idx;                                /*       |       ((04)) |             |              */
    }   ev_timeout_pos;                                  /*       |  16          | 56          | 0x28  - 0x38 */
    int ev_fd;                                           /*  0x38 |  04          | 60          | 0x38  - 0x3c */
```

Since the next field is 8 bytes in length, and we are up to 60 bytes, `ev_fd` ends up being padded (4 more bytes on 64b).

```C
                              /* --- 1 byte gap HERE --->            1           | <61>                      */
                              /* --- 1 byte gap HERE --->            1           | <62>                      */
                              /* --- 1 byte gap HERE --->            1           | <63>                      */
                              /* --- 1 byte gap HERE --->            1           | <64>                      */
    struct event_base * ev_base;                         /*  0x3c |  8           | 68         | 0x3c - 0x40  */
    union {                                              /*  0x40 | ------------ | ---------- | ------------ */
        struct {                                         /*       | ------------ |            |              */
            LIST_ENTRY (event) ev_io_next;               /*       |       ((16+  |            |              */
            struct timeval ev_timeout;                   /*       |         16)) |            |              */
        } ev_io;                                         /*       | ((32))       |            |              */
        struct {                                         /*       | ------------ |            |              */
            LIST_ENTRY (event) ev_signal_next;           /*       |      ((16+   |            |              */
            short   ev_ncalls;                           /*       |        02+   |            |              */
            short * ev_pncalls;                          /*       |        08))  |            |              */
        } ev_signal;                                     /*       | ((26))       |            |              */
    } ev_;                                               /*  0x60 | 32           | 100        | 0x40 - 0x60  */

    short ev_events;                                     /*  0x60 | 2            | 102        | 0x60 - 0x62  */
    short ev_res;                                        /*  0x62 | 2            | 104        | 0x62 - 0x64  */
```

We now hit another line, `struct timeval` is 16 bytes on 64b arch, so we have 4 more bytes
of padding on `ev_res`.

```C
    /* --- 1 byte gap HERE --- */
    /* --- 1 byte gap HERE --- */
    /* --- 1 byte gap HERE --- */
    /* --- 1 byte gap HERE --- */
    struct timeval ev_timeout;                           /*  0x64 | 16           | 120        | 0x64 - 0x74  */
};
```

After moving `ev_events` and `ev_res` below `ev_fd` we have something
a bit more optimal:

```C
struct event2 {
                                                         /*  OFFS |  SZ / Bytes  | RSUM Bytes | START - END  */
    struct event_callback ev_evcallback;                 /*   0x0 | 40           | 40         | 0x0   - 0x28 */
    union {                                              /*  0x28 | ------------ | ---------- | ------------ */
        TAILQ_ENTRY(event) ev_next_with_common_timeout;  /*       |       ((16)) |            |              */
        int min_heap_idx;                                /*       |       ((04)) |            |              */
    } ev_timeout_pos;                                    /*       | 16           | 56         | 0x28  - 0x38 */
    int ev_fd;                                           /*  0x38 | 4            | 60         | 0x38  - 0x3c */
    short ev_events;                                     /*  0x3c | 2            | 62         | 0x3c  - 0x3e */
    short ev_res;                                        /*  0x3e | 2            | 64         | 0x3e  - 0x40 */

    struct event_base * ev_base;                         /*  0x40 | 8            | 74         | 0x40  - 0x48 */
    union {                                              /*  0x48 | ------------ | ---------- | ------------ */
        struct {                                         /*       | ------------ |            |              */
            LIST_ENTRY (event) ev_io_next;               /*       |        ((16+ |            |              */
            struct timeval ev_timeout;                   /*       |         16)) |            |              */
        } ev_io;                                         /*       | ((32))       |            |              */
        struct {                                         /*       | ------------ |            |              */
            LIST_ENTRY (event) ev_signal_next;           /*       |       ((16+  |            |              */
            short   ev_ncalls;                           /*       |         02+  |            |              */
            short * ev_pncalls;                          /*       |         08)) |            |              */
        } ev_signal;                                     /*       | ((26))       |            |              */
    } ev_;                                               /*       | 32           | 106        | 0x48 - 0x68  */
    struct timeval ev_timeout;                           /* 0x68  | 16           | 120        | 0x68 - 0x78  */
};
```

We still have a gap here, but the first was removed.

Again, we can save 8 bytes on both 32 and 64 word sizes (32/64 byte cacheline).

Below are the results for testing v2.1.6 -> master -> master + this patch (Release/-O3)

Code:

```C
  #include <event2/event.h>

  int
  main(int argc, char ** argv) {
      printf("%zu\n", event_get_struct_event_size());
      return 0;
  }
```

Branch: `master` (2.2.x)

```
  $ gcc -O3 -Wall -Wl,-R/usr/local/lib bleh.c -L/usr/local/lib -o bleh  -levent
  $ ldd bleh
      linux-vdso.so.1 =>  (0x00007ffc3df50000)
      libevent.so.2.2.0 => /usr/local/lib/libevent.so.2.2.0 (0x00007f91fd781000)
      libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f91fd3a1000)
      libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007f91fd182000)
      /lib64/ld-linux-x86-64.so.2 (0x00007f91fdbcc000)
  $ ./bleh
  128
```

Release: `2.1.6`

```
  $ gcc -O3 bleh.c -o bleh  -levent
  $ ldd bleh
      linux-vdso.so.1 =>  (0x00007ffd43773000)
      libevent-2.1.so.6 => /usr/lib/x86_64-linux-gnu/libevent-2.1.so.6 (0x00007feb3add6000)
      libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007feb3a9f6000)
      libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007feb3a7d7000)
      /lib64/ld-linux-x86-64.so.2 (0x00007feb3b22a000)
  $ ./bleh
  128
```

Branch: `this one`

```
  $ gcc -O3 -Wl,-R./lib bleh.c -o bleh -L./lib -levent
  $ ldd bleh
      linux-vdso.so.1 =>  (0x00007ffff55f7000)
      libevent.so.2.2.0 => ./lib/libevent.so.2.2.0 (0x00007ff8e5c82000)
      libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007ff8e58a2000)
      libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007ff8e5683000)
      /lib64/ld-linux-x86-64.so.2 (0x00007ff8e60cd000)
  $ ./bleh
  120
```
include/event2/event_struct.h