]> granicus.if.org Git - ipset/commitdiff
netfilter: ipset: fix race condition in ipset save, swap and delete
authorVishwanath Pai <vpai@akamai.com>
Wed, 16 Mar 2016 08:03:30 +0000 (09:03 +0100)
committerJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Wed, 16 Mar 2016 08:03:30 +0000 (09:03 +0100)
This fix adds a new reference counter (ref_netlink) for the struct ip_set.
The other reference counter (ref) can be swapped out by ip_set_swap and we
need a separate counter to keep track of references for netlink events
like dump. Using the same ref counter for dump causes a race condition
which can be demonstrated by the following script:

ipset create hash_ip1 hash:ip family inet hashsize 1024 maxelem 500000 \
counters
ipset create hash_ip2 hash:ip family inet hashsize 300000 maxelem 500000 \
counters
ipset create hash_ip3 hash:ip family inet hashsize 1024 maxelem 500000 \
counters

ipset save &

ipset swap hash_ip3 hash_ip2
ipset destroy hash_ip3 /* will crash the machine */

Swap will exchange the values of ref so destroy will see ref = 0 instead of
ref = 1. With this fix in place swap will not succeed because ipset save
still has ref_netlink on the set (ip_set_swap doesn't swap ref_netlink).

Both delete and swap will error out if ref_netlink != 0 on the set.

Note: The changes to *_head functions is because previously we would
increment ref whenever we called these functions, we don't do that
anymore.

Reviewed-by: Joshua Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
kernel/include/linux/netfilter/ipset/ip_set.h
kernel/net/netfilter/ipset/ip_set_bitmap_gen.h
kernel/net/netfilter/ipset/ip_set_core.c
kernel/net/netfilter/ipset/ip_set_hash_gen.h
kernel/net/netfilter/ipset/ip_set_list_set.c

index eaf5057a8e83ccf3e884713dab9d1162dbbcb80e..3e44e66e9d4cc4a6bab6366dbadd7a0c13c32a98 100644 (file)
@@ -235,6 +235,10 @@ struct ip_set {
        spinlock_t lock;
        /* References to the set */
        u32 ref;
+       /* References to the set for netlink events like dump,
+        * ref can be swapped out by ip_set_swap
+        */
+       u32 ref_netlink;
        /* The core set type */
        struct ip_set_type *type;
        /* The type variant doing the real job */
index 0c1ba2e221dab7030a2791b27d97c90378029c8a..211dc663fcd2a76a21d0f508120f64d225f4f18e 100644 (file)
@@ -105,7 +105,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
        if (!nested)
                goto nla_put_failure;
        if (mtype_do_head(skb, map) ||
-           nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+           nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
            nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
            nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
                goto nla_put_failure;
index d34dfef94209dfbff313b261fd093b85ec10c159..0be88467d60a5d81c6653a2f536daba4c2d4f3a2 100644 (file)
@@ -502,6 +502,26 @@ __ip_set_put(struct ip_set *set)
        write_unlock_bh(&ip_set_ref_lock);
 }
 
+/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
+ * a separate reference counter
+ */
+static inline void
+__ip_set_get_netlink(struct ip_set *set)
+{
+       write_lock_bh(&ip_set_ref_lock);
+       set->ref_netlink++;
+       write_unlock_bh(&ip_set_ref_lock);
+}
+
+static inline void
+__ip_set_put_netlink(struct ip_set *set)
+{
+       write_lock_bh(&ip_set_ref_lock);
+       BUG_ON(set->ref_netlink == 0);
+       set->ref_netlink--;
+       write_unlock_bh(&ip_set_ref_lock);
+}
+
 /* Add, del and test set entries from kernel.
  *
  * The set behind the index must exist and must be referenced
@@ -1014,7 +1034,7 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
        if (!attr[IPSET_ATTR_SETNAME]) {
                for (i = 0; i < inst->ip_set_max; i++) {
                        s = ip_set(inst, i);
-                       if (s && s->ref) {
+                       if (s && (s->ref || s->ref_netlink)) {
                                ret = -IPSET_ERR_BUSY;
                                goto out;
                        }
@@ -1036,7 +1056,7 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
                if (!s) {
                        ret = -ENOENT;
                        goto out;
-               } else if (s->ref) {
+               } else if (s->ref || s->ref_netlink) {
                        ret = -IPSET_ERR_BUSY;
                        goto out;
                }
@@ -1186,6 +1206,9 @@ IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl,
              from->family == to->family))
                return -IPSET_ERR_TYPE_MISMATCH;
 
+       if (from->ref_netlink || to->ref_netlink)
+               return -EBUSY;
+
        strncpy(from_name, from->name, IPSET_MAXNAMELEN);
        strncpy(from->name, to->name, IPSET_MAXNAMELEN);
        strncpy(to->name, from_name, IPSET_MAXNAMELEN);
@@ -1221,7 +1244,7 @@ ip_set_dump_done(struct netlink_callback *cb)
                if (set->variant->uref)
                        set->variant->uref(set, cb, false);
                pr_debug("release set %s\n", set->name);
-               __ip_set_put_byindex(inst, index);
+               __ip_set_put_netlink(set);
        }
        return 0;
 }
@@ -1343,7 +1366,7 @@ dump_last:
                if (!cb->args[IPSET_CB_ARG0]) {
                        /* Start listing: make sure set won't be destroyed */
                        pr_debug("reference set\n");
-                       set->ref++;
+                       set->ref_netlink++;
                }
                write_unlock_bh(&ip_set_ref_lock);
                nlh = start_msg(skb, NETLINK_PORTID(cb->skb),
@@ -1411,7 +1434,7 @@ release_refcount:
                if (set->variant->uref)
                        set->variant->uref(set, cb, false);
                pr_debug("release set %s\n", set->name);
-               __ip_set_put_byindex(inst, index);
+               __ip_set_put_netlink(set);
                cb->args[IPSET_CB_ARG0] = 0;
        }
 out:
index 75c6d225119eab313b80555a491cbf6429097be1..9128a220c41a38746957141877cc1ca5211c54e7 100644 (file)
@@ -1075,7 +1075,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
        if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
                goto nla_put_failure;
 #endif
-       if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+       if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
            nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
            nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
                goto nla_put_failure;
index 4fb9186d8aef8d8b59f5bc3414848f0a80d194d7..dede343a662b2049b442c15745faba5a0e697d5b 100644 (file)
@@ -474,7 +474,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
        if (!nested)
                goto nla_put_failure;
        if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
-           nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+           nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
            nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
            nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
                goto nla_put_failure;