]> granicus.if.org Git - ipset/commitdiff
Eight stage to ipset-5
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Thu, 22 Apr 2010 15:09:18 +0000 (17:09 +0200)
committerJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Thu, 22 Apr 2010 15:09:18 +0000 (17:09 +0200)
Commit changed files in kernel/...

14 files changed:
kernel/ChangeLog
kernel/Kbuild
kernel/Kconfig.ipset
kernel/Makefile
kernel/include/linux/netfilter/ip_set.h
kernel/include/linux/netfilter/ip_set_bitmap.h
kernel/include/linux/netfilter/ip_set_getport.h
kernel/include/linux/netfilter/ip_set_hash.h
kernel/include/linux/netfilter/ip_set_jhash.h
kernel/ip_set.c
kernel/ip_set_bitmap_ip.c
kernel/ip_set_bitmap_ipmac.c
kernel/ip_set_bitmap_port.c
kernel/ip_set_hash_ip.c

index 1ad54cdca6e0bd7d6f1337e8aa3203fa87e9be0c..b587dc86bd088850bc8f9d5adf882b0c786fd1c1 100644 (file)
@@ -1,3 +1,6 @@
+5.0
+ - New main branch - ipset completely rewritten
+
 4.2
   - nethash and ipportnethash types counted every entry twice
     which could produce bogus entries when listing/saving these types
index 9757a4a21f6c902fde22f070d1d498427fb6f54c..c17171166bf770ceebec51f2b8511a50eca3c16c 100644 (file)
@@ -1,14 +1,15 @@
 EXTRA_CFLAGS := -I$(M)/include \
-       -DCONFIG_IP_NF_SET_MAX=$(IP_NF_SET_MAX) \
-       -DCONFIG_IP_NF_SET_HASHSIZE=$(IP_NF_SET_HASHSIZE)
+       -DCONFIG_IP_SET_MAX=$(IP_SET_MAX)
 
-obj-m += ip_set.o ipt_set.o ipt_SET.o
-obj-m += ip_set_ipmap.o ip_set_macipmap.o ip_set_portmap.o
-obj-m += ip_set_iphash.o ip_set_nethash.o ip_set_ipporthash.o
-obj-m += ip_set_ipportiphash.o ip_set_ipportnethash.o
-obj-m += ip_set_iptree.o ip_set_iptreemap.o
-obj-m += ip_set_setlist.o
+obj-m += ip_set.o
+#ipt_set.o ipt_SET.o
+obj-m += ip_set_bitmap_ip.o ip_set_bitmap_ipmac.o ip_set_bitmap_port.o
+obj-m += ip_set_hash_ip.o
+#obj-m += ip_set_iphash.o ip_set_nethash.o ip_set_ipporthash.o
+#obj-m += ip_set_ipportiphash.o ip_set_ipportnethash.o
+#obj-m += ip_set_iptree.o ip_set_iptreemap.o
+#obj-m += ip_set_setlist.o
 
 # It's for me...
-incdirs := $(M) $(M)/include/linux/netfilter_ipv4
-clean-files := $(foreach dir,$(incdirs),$(wildcard $(dir)/*~)) *.m.c
+incdirs := $(M) $(M)/include/linux/netfilter
+clean-files := $(foreach dir,$(incdirs),$(wildcard $(dir)/*~))
index 8b27517fae5a8aad7f9fe5aaef17d3bf66cee3bc..7f7a34a306cffbded3cbb8a06d2b6e97c1afb7af 100644 (file)
@@ -1,4 +1,4 @@
-config IP_NF_SET
+config IP_SET
        tristate "IP set support"
        depends on INET && NETFILTER
        help
@@ -8,11 +8,11 @@ config IP_NF_SET
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_MAX
+config IP_SET_MAX
        int "Maximum number of IP sets"
        default 256
        range 2 65534
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          You can define here default value of the maximum number 
          of IP sets for the kernel.
@@ -20,117 +20,106 @@ config IP_NF_SET_MAX
          The value can be overriden by the 'max_sets' module
          parameter of the 'ip_set' module.
 
-config IP_NF_SET_HASHSIZE
-       int "Hash size for bindings of IP sets"
-       default 1024
-       depends on IP_NF_SET
-       help
-         You can define here default value of the hash size for
-         bindings of IP sets.
-
-         The value can be overriden by the 'hash_size' module
-         parameter of the 'ip_set' module.
-
-config IP_NF_SET_IPMAP
+config IP_SET_IPMAP
        tristate "ipmap set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the ipmap set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_MACIPMAP
+config IP_SET_MACIPMAP
        tristate "macipmap set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the macipmap set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_PORTMAP
+config IP_SET_PORTMAP
        tristate "portmap set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the portmap set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_IPHASH
+config IP_SET_IPHASH
        tristate "iphash set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the iphash set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_NETHASH
+config IP_SET_NETHASH
        tristate "nethash set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the nethash set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_IPPORTHASH
+config IP_SET_IPPORTHASH
        tristate "ipporthash set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the ipporthash set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_IPPORTIPHASH
+config IP_SET_IPPORTIPHASH
        tristate "ipportiphash set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the ipportiphash set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_IPPORTNETHASH
+config IP_SET_IPPORTNETHASH
        tristate "ipportnethash set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the ipportnethash set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_IPTREE
+config IP_SET_IPTREE
        tristate "iptree set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the iptree set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_IPTREEMAP
+config IP_SET_IPTREEMAP
        tristate "iptreemap set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the iptreemap set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_SET_SETLIST
+config IP_SET_SETLIST
        tristate "setlist set support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          This option adds the setlist set type support.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_SET
+config IP_MATCH_SET
        tristate "set match support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          Set matching matches against given IP sets.
          You need the ipset utility to create and set up the sets.
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_SET
+config IP_TARGET_SET
        tristate "SET target support"
-       depends on IP_NF_SET
+       depends on IP_SET
        help
          The SET target makes possible to add/delete entries
          in IP sets.
index 9ec91f6f235c75a75545d689c920fce583b6d249..ca85e88d7f4fb26c2549e59841a9a63d9aec8dbf 100644 (file)
@@ -3,14 +3,5 @@ include Kbuild
 else
 KERNELDIR := /lib/modules/`uname -r`/build
 all::
-       $(MAKE) -C $KERNELDIR M=`pwd` $@
-
-expand_macros: $(patsubst %.c, %.m.c, $(filter-out %.mod.c %.m.c, $(wildcard ip_set_*.c)))
-
-%.m.c: %.c
-       ./expand_macros.pl < $< > $@
-
-clean:
-       rm -rf *.m.c
-
+       $(MAKE) -C $(KERNELDIR) M=`pwd` $@
 endif
index da173195104aa9093722237dcd4bac54a9752d97..d0b47a0a620be5f5a1539b9419318981b6050383 100644 (file)
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
  *                         Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.  
  */
 
-#if 0
+#if 1
 #define IP_SET_DEBUG
 #endif
 
-/*
- * A sockopt of such quality has hardly ever been seen before on the open
- * market!  This little beauty, hardly ever used: above 64, so it's
- * traditionally used for firewalling, not touched (even once!) by the
- * 2.0, 2.2 and 2.4 kernels!
- *
- * Comes with its own certificate of authenticity, valid anywhere in the
- * Free world!
- *
- * Rusty, 19.4.2000
- */
-#define SO_IP_SET              83
-
-/*
- * Heavily modify by Joakim Axelsson 08.03.2002
- * - Made it more modulebased
- *
- * Additional heavy modifications by Jozsef Kadlecsik 22.02.2004
- * - bindings added
- * - in order to "deal with" backward compatibility, renamed to ipset
- */
-
-/* 
- * Used so that the kernel module and ipset-binary can match their versions 
- */
-#define IP_SET_PROTOCOL_UNALIGNED      3
-#define IP_SET_PROTOCOL_VERSION                4
-
-#define IP_SET_MAXNAMELEN 32   /* set names and set typenames */
-
-/* Lets work with our own typedef for representing an IP address.
- * We hope to make the code more portable, possibly to IPv6...
- *
- * The representation works in HOST byte order, because most set types
- * will perform arithmetic operations and compare operations.
- * 
- * For now the type is an uint32_t.
- *
- * Make sure to ONLY use the functions when translating and parsing
- * in order to keep the host byte order and make it more portable:
- *  parse_ip()
- *  parse_mask()
- *  parse_ipandmask()
- *  ip_tostring()
- * (Joakim: where are they???)
- */
-
-typedef uint32_t ip_set_ip_t;
-
-/* Sets are identified by an id in kernel space. Tweak with ip_set_id_t
- * and IP_SET_INVALID_ID if you want to increase the max number of sets.
- */
-typedef uint16_t ip_set_id_t;
-
-#define IP_SET_INVALID_ID      65535
-
-/* How deep we follow bindings */
-#define IP_SET_MAX_BINDINGS    6
-
-/*
- * Option flags for kernel operations (ipt_set_info)
- */
-#define IPSET_SRC              0x01    /* Source match/add */
-#define IPSET_DST              0x02    /* Destination match/add */
-#define IPSET_MATCH_INV                0x04    /* Inverse matching */
-
-/*
- * Set features
- */
-#define IPSET_TYPE_IP          0x01    /* IP address type of set */
-#define IPSET_TYPE_PORT                0x02    /* Port type of set */
-#define IPSET_DATA_SINGLE      0x04    /* Single data storage */
-#define IPSET_DATA_DOUBLE      0x08    /* Double data storage */
-#define IPSET_DATA_TRIPLE      0x10    /* Triple data storage */
-#define IPSET_TYPE_IP1         0x20    /* IP address type of set */
-#define IPSET_TYPE_SETNAME     0x40    /* setname type of set */
-
-/* Reserved keywords */
-#define IPSET_TOKEN_DEFAULT    ":default:"
-#define IPSET_TOKEN_ALL                ":all:"
-
-/* SO_IP_SET operation constants, and their request struct types.
- *
- * Operation ids:
- *       0-99:  commands with version checking
- *     100-199: add/del/test/bind/unbind
- *     200-299: list, save, restore
- */
-
-/* Single shot operations: 
- * version, create, destroy, flush, rename and swap 
- *
- * Sets are identified by name.
- */
-
-#define IP_SET_REQ_STD         \
-       unsigned op;            \
-       unsigned version;       \
-       char name[IP_SET_MAXNAMELEN]
-
-#define IP_SET_OP_CREATE       0x00000001      /* Create a new (empty) set */
-struct ip_set_req_create {
-       IP_SET_REQ_STD;
-       char typename[IP_SET_MAXNAMELEN];
+/* The protocol version */
+#define IPSET_PROTOCOL         5
+
+/* The max length of strings: set and type identifiers */
+#define IPSET_MAXNAMELEN       32
+
+/* Message types and commands */
+enum ipset_cmd {
+       IPSET_CMD_NONE,
+       IPSET_CMD_CREATE,       /* Create a new (empty) set */
+       IPSET_CMD_DESTROY,      /* Remove a (empty) set */
+       IPSET_CMD_FLUSH,        /* Remove all elements from a set */
+       IPSET_CMD_RENAME,       /* Rename a set */
+       IPSET_CMD_SWAP,         /* Swap two sets */
+       IPSET_CMD_LIST,         /* List sets */
+       IPSET_CMD_SAVE,         /* Save sets */
+       IPSET_CMD_ADD,          /* Add an element to a set */
+       IPSET_CMD_DEL,          /* Delete an element from a set */
+       IPSET_CMD_TEST,         /* Test an element in a set */
+       IPSET_CMD_HEADER,       /* Get set header data only */
+       IPSET_CMD_TYPE,         /* Get set type */
+       IPSET_CMD_PROTOCOL,     /* Return protocol version */
+       IPSET_MSG_MAX,          /* Netlink message commands */
+
+       /* Commands in userspace: */
+       IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* Enter restore mode */     
+       IPSET_CMD_HELP,         /* Get help */
+       IPSET_CMD_VERSION,      /* Get program version */
+
+       IPSET_CMD_MAX,
+
+       IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* Commit buffered commands */
 };
 
-#define IP_SET_OP_DESTROY      0x00000002      /* Remove a (empty) set */
-struct ip_set_req_std {
-       IP_SET_REQ_STD;
+/* Attributes at command level */
+enum {
+       IPSET_ATTR_UNSPEC,
+       IPSET_ATTR_PROTOCOL,    /* Protocol version */
+       IPSET_ATTR_SETNAME,     /* Name of the set */
+       IPSET_ATTR_TYPENAME,    /* Typename */
+       IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* rename/swap */
+       IPSET_ATTR_REVISION,    /* Settype revision */
+       IPSET_ATTR_FAMILY,      /* Settype family */
+       IPSET_ATTR_DATA,        /* Nested attributes */
+       IPSET_ATTR_ADT,         /* Multiple data containers */
+       IPSET_ATTR_LINENO,      /* Restore lineno */
+       IPSET_ATTR_PROTOCOL_MIN,/* Minimal supported version number */
+       IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
+       __IPSET_ATTR_CMD_MAX,
 };
-
-#define IP_SET_OP_FLUSH                0x00000003      /* Remove all IPs in a set */
-/* Uses ip_set_req_std */
-
-#define IP_SET_OP_RENAME       0x00000004      /* Rename a set */
-/* Uses ip_set_req_create */
-
-#define IP_SET_OP_SWAP         0x00000005      /* Swap two sets */
-/* Uses ip_set_req_create */
-
-union ip_set_name_index {
-       char name[IP_SET_MAXNAMELEN];
-       ip_set_id_t index;
+#define IPSET_ATTR_CMD_MAX     (__IPSET_ATTR_CMD_MAX - 1)
+
+/* CADT specific attributes */
+enum {
+       IPSET_ATTR_IP = IPSET_ATTR_UNSPEC + 1,
+       IPSET_ATTR_IP_FROM = IPSET_ATTR_IP,
+       IPSET_ATTR_IP_TO,
+       IPSET_ATTR_CIDR,
+       IPSET_ATTR_PORT,
+       IPSET_ATTR_PORT_FROM = IPSET_ATTR_PORT,
+       IPSET_ATTR_PORT_TO,
+       IPSET_ATTR_TIMEOUT,
+       IPSET_ATTR_FLAGS,
+       /* IPSET_ATTR_LINENO */
+       /* Reserve empty slots */
+       IPSET_ATTR_CADT_MAX = 16,
+       /* Create-only specific attributes */
+       IPSET_ATTR_GC,
+       IPSET_ATTR_HASHSIZE,
+       IPSET_ATTR_MAXELEM,
+       IPSET_ATTR_NETMASK,
+       IPSET_ATTR_PROBES,
+       IPSET_ATTR_RESIZE,
+       IPSET_ATTR_SIZE,
+       /* Kernel-only */
+       IPSET_ATTR_ELEMENTS,
+       IPSET_ATTR_REFERENCES,
+       IPSET_ATTR_MEMSIZE,
+       
+       __IPSET_ATTR_CREATE_MAX,
 };
-
-#define IP_SET_OP_GET_BYNAME   0x00000006      /* Get set index by name */
-struct ip_set_req_get_set {
-       unsigned op;
-       unsigned version;
-       union ip_set_name_index set;
+#define IPSET_ATTR_CREATE_MAX  (__IPSET_ATTR_CREATE_MAX - 1)
+
+/* ADT specific attributes */
+enum {
+       IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + 1,
+       IPSET_ATTR_NAME,
+       IPSET_ATTR_NAMEREF,
+       IPSET_ATTR_IP2,
+       IPSET_ATTR_CIDR2,
+       __IPSET_ATTR_ADT_MAX,
 };
-
-#define IP_SET_OP_GET_BYINDEX  0x00000007      /* Get set name by index */
-/* Uses ip_set_req_get_set */
-
-#define IP_SET_OP_VERSION      0x00000100      /* Ask kernel version */
-struct ip_set_req_version {
-       unsigned op;
-       unsigned version;
+#define IPSET_ATTR_ADT_MAX     (__IPSET_ATTR_ADT_MAX - 1)
+
+/* Error codes */
+enum ipset_errno {
+       IPSET_ERR_PRIVATE = 128,
+       IPSET_ERR_PROTOCOL,
+       IPSET_ERR_FIND_TYPE,
+       IPSET_ERR_MAX_SETS,
+       IPSET_ERR_BUSY,
+       IPSET_ERR_EXIST_SETNAME2,
+       IPSET_ERR_TYPE_MISMATCH,
+       IPSET_ERR_EXIST,
+       IPSET_ERR_INVALID_CIDR,
+       IPSET_ERR_INVALID_NETMASK,
+       IPSET_ERR_INVALID_FAMILY,
+       IPSET_ERR_TIMEOUT,
+
+       IPSET_ERR_TYPE_SPECIFIC = 160,
 };
-
-/* Double shots operations: 
- * add, del, test, bind and unbind.
- *
- * First we query the kernel to get the index and type of the target set,
- * then issue the command. Validity of IP is checked in kernel in order
- * to minimalize sockopt operations.
- */
-
-/* Get minimal set data for add/del/test/bind/unbind IP */
-#define IP_SET_OP_ADT_GET      0x00000010      /* Get set and type */
-struct ip_set_req_adt_get {
-       unsigned op;
-       unsigned version;
-       union ip_set_name_index set;
-       char typename[IP_SET_MAXNAMELEN];
+                                       
+enum ipset_data_flags {
+       IPSET_FLAG_BIT_EXIST    = 0,
+       IPSET_FLAG_EXIST        = (1 << IPSET_FLAG_BIT_EXIST),
+       
+       IPSET_FLAG_BIT_BEFORE   = 2,
+       IPSET_FLAG_BEFORE       = (1 << IPSET_FLAG_BIT_BEFORE),
 };
 
-#define IP_SET_REQ_BYINDEX     \
-       unsigned op;            \
-       ip_set_id_t index;
-
-struct ip_set_req_adt {
-       IP_SET_REQ_BYINDEX;
-};
-
-#define IP_SET_OP_ADD_IP       0x00000101      /* Add an IP to a set */
-/* Uses ip_set_req_adt, with type specific addage */
-
-#define IP_SET_OP_DEL_IP       0x00000102      /* Remove an IP from a set */
-/* Uses ip_set_req_adt, with type specific addage */
-
-#define IP_SET_OP_TEST_IP      0x00000103      /* Test an IP in a set */
-/* Uses ip_set_req_adt, with type specific addage */
-
-#define IP_SET_OP_BIND_SET     0x00000104      /* Bind an IP to a set */
-/* Uses ip_set_req_bind, with type specific addage */
-struct ip_set_req_bind {
-       IP_SET_REQ_BYINDEX;
-       char binding[IP_SET_MAXNAMELEN];
-};
-
-#define IP_SET_OP_UNBIND_SET   0x00000105      /* Unbind an IP from a set */
-/* Uses ip_set_req_bind, with type speficic addage 
- * index = 0 means unbinding for all sets */
-
-#define IP_SET_OP_TEST_BIND_SET        0x00000106      /* Test binding an IP to a set */
-/* Uses ip_set_req_bind, with type specific addage */
-
-/* Multiple shots operations: list, save, restore.
- *
- * - check kernel version and query the max number of sets
- * - get the basic information on all sets
- *   and size required for the next step
- * - get actual set data: header, data, bindings
- */
-
-/* Get max_sets and the index of a queried set
- */
-#define IP_SET_OP_MAX_SETS     0x00000020
-struct ip_set_req_max_sets {
-       unsigned op;
-       unsigned version;
-       ip_set_id_t max_sets;           /* max_sets */
-       ip_set_id_t sets;               /* real number of sets */
-       union ip_set_name_index set;    /* index of set if name used */
+/* Commands with settype-specific attributes */
+enum ipset_adt {
+       IPSET_ADD,
+       IPSET_DEL,
+       IPSET_TEST,
+       IPSET_CREATE,
+       IPSET_CADT_MAX,
 };
 
-/* Get the id and name of the sets plus size for next step */
-#define IP_SET_OP_LIST_SIZE    0x00000201
-#define IP_SET_OP_SAVE_SIZE    0x00000202
-struct ip_set_req_setnames {
-       unsigned op;
-       ip_set_id_t index;              /* set to list/save */
-       u_int32_t size;                 /* size to get setdata */
-       /* followed by sets number of struct ip_set_name_list */
-};
-
-struct ip_set_name_list {
-       char name[IP_SET_MAXNAMELEN];
-       char typename[IP_SET_MAXNAMELEN];
-       ip_set_id_t index;
-       ip_set_id_t id;
-};
-
-/* The actual list operation */
-#define IP_SET_OP_LIST         0x00000203
-struct ip_set_req_list {
-       IP_SET_REQ_BYINDEX;
-       /* sets number of struct ip_set_list in reply */ 
-};
+#ifndef __KERNEL__
+#ifdef IP_SET_DEBUG
+#include <stdio.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#define D(format, args...)     do {                            \
+       fprintf(stderr, "%s: %s: ", __FILE__, __FUNCTION__);    \
+       fprintf(stderr, format "\n" , ## args);                 \
+} while (0)
+static inline void
+dump_nla(struct  nlattr *nla[], int maxlen)
+{
+       int i;
+       
+       for (i = 0; i < maxlen; i++)
+               D("nla[%u] does%s exist", i, !nla[i] ? " NOT" : "");
+}
 
-struct ip_set_list {
-       ip_set_id_t index;
-       ip_set_id_t binding;
-       u_int32_t ref;
-       u_int32_t header_size;  /* Set header data of header_size */
-       u_int32_t members_size; /* Set members data of members_size */
-       u_int32_t bindings_size;/* Set bindings data of bindings_size */
-};
+#else
+#define D(format, args...)
+#define dump_nla(nla, maxlen)
+#endif
+#endif /* !__KERNEL__ */
 
-struct ip_set_hash_list {
-       ip_set_ip_t ip;
-       ip_set_id_t binding;
-};
+#ifdef __KERNEL__
+#include <linux/ipv6.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
 
-/* The save operation */
-#define IP_SET_OP_SAVE         0x00000204
-/* Uses ip_set_req_list, in the reply replaced by
- * sets number of struct ip_set_save plus a marker
- * ip_set_save followed by ip_set_hash_save structures.
+/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t
+ * and IPSET_INVALID_ID if you want to increase the max number of sets.
  */
-struct ip_set_save {
-       ip_set_id_t index;
-       ip_set_id_t binding;
-       u_int32_t header_size;  /* Set header data of header_size */
-       u_int32_t members_size; /* Set members data of members_size */
-};
+typedef uint16_t ip_set_id_t;
 
-/* At restoring, ip == 0 means default binding for the given set: */
-struct ip_set_hash_save {
-       ip_set_ip_t ip;
-       ip_set_id_t id;
-       ip_set_id_t binding;
+#define IPSET_INVALID_ID               65535
+
+/* Option flags for kernel operations */
+enum ip_set_kopt {
+       /* Bit 0 is reserved */
+       IPSET_SRC_FLAG = 1,
+       IPSET_SRC = (1 << IPSET_SRC_FLAG),
+       IPSET_DST_FLAG = 2,
+       IPSET_DST = (1 << IPSET_DST_FLAG),
+       IPSET_INV_FLAG = 3,
+       IPSET_INV = (1 << IPSET_INV_FLAG),
 };
 
-/* The restore operation */
-#define IP_SET_OP_RESTORE      0x00000205
-/* Uses ip_set_req_setnames followed by ip_set_restore structures
- * plus a marker ip_set_restore, followed by ip_set_hash_save 
- * structures.
- */
-struct ip_set_restore {
-       char name[IP_SET_MAXNAMELEN];
-       char typename[IP_SET_MAXNAMELEN];
-       ip_set_id_t index;
-       u_int32_t header_size;  /* Create data of header_size */
-       u_int32_t members_size; /* Set members data of members_size */
+/* Set features */
+enum ip_set_feature {
+       IPSET_TYPE_IP_FLAG = 0,
+       IPSET_TYPE_IP = (1 << IPSET_TYPE_IP_FLAG),
+       IPSET_TYPE_PORT_FLAG = 1,
+       IPSET_TYPE_PORT = (1 << IPSET_TYPE_PORT_FLAG),
+       IPSET_TYPE_MAC_FLAG = 2,
+       IPSET_TYPE_MAC = (1 << IPSET_TYPE_MAC_FLAG),
+       IPSET_TYPE_IP2_FLAG = 3,
+       IPSET_TYPE_IP2 = (1 << IPSET_TYPE_IP2_FLAG),
+       IPSET_TYPE_NAME_FLAG = 4,
+       IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
 };
 
-static inline int bitmap_bytes(ip_set_ip_t a, ip_set_ip_t b)
+static inline int
+bitmap_bytes(uint32_t a, uint32_t b)
 {
        return 4 * ((((b - a + 8) / 8) + 3) / 4);
 }
 
-/* General limit for the elements in a set */
-#define MAX_RANGE 0x0000FFFF
-
-/* Alignment: 'unsigned long' unsupported */
-#define IPSET_ALIGNTO          4
-#define        IPSET_ALIGN(len) (((len) + IPSET_ALIGNTO - 1) & ~(IPSET_ALIGNTO - 1))
-#define IPSET_VALIGN(len, old) ((old) ? (len) : IPSET_ALIGN(len))
-
-#ifdef __KERNEL__
-#include <linux/netfilter_ipv4/ip_set_compat.h>
-#include <linux/netfilter_ipv4/ip_set_malloc.h>
-
-#define ip_set_printk(format, args...)                         \
+#define ip_set_printk(format, args...)                                 \
        do {                                                    \
                printk("%s: %s: ", __FILE__, __FUNCTION__);     \
                printk(format "\n" , ## args);                  \
        } while (0)
 
 #if defined(IP_SET_DEBUG)
-#define DP(format, args...)                                    \
+#define D(format, args...)                                     \
        do {                                                    \
                printk("%s: %s (DBG): ", __FILE__, __FUNCTION__);\
                printk(format "\n" , ## args);                  \
        } while (0)
-#define IP_SET_ASSERT(x)                                       \
-       do {                                                    \
-               if (!(x))                                       \
-                       printk("IP_SET_ASSERT: %s:%i(%s)\n",    \
-                               __FILE__, __LINE__, __FUNCTION__); \
-       } while (0)
+
+static inline void
+dump_nla(const struct nlattr * const nla[], int maxlen)
+{
+       int i;
+       
+       for (i = 0; i < maxlen; i++)
+               printk("nlattr[%u] does%s exist\n", i, nla[i] ? "" : " NOT");
+}
 #else
-#define DP(format, args...)
-#define IP_SET_ASSERT(x)
+#define D(format, args...)
+#define dump_nla(nla, maxlen)
 #endif
 
 struct ip_set;
 
-/*
- * The ip_set_type definition - one per set type, e.g. "ipmap".
- *
- * Each individual set has a pointer, set->type, going to one
- * of these structures. Function pointers inside the structure implement
- * the real behaviour of the sets.
- *
- * If not mentioned differently, the implementation behind the function
- * pointers of a set_type, is expected to return 0 if ok, and a negative
- * errno (e.g. -EINVAL) on error.
- */
+/* Set type, variant-specific part */
+struct ip_set_type_variant {
+       /* Kernelspace: test/add/del entries */
+       int (*kadt)(struct ip_set *set, const struct sk_buff * skb,
+                   enum ipset_adt adt, uint8_t pf, const uint8_t *flags);
+
+       /* Userspace: test/add/del entries */
+       int (*uadt)(struct ip_set *set, struct nlattr *head, int len,
+                   enum ipset_adt adt, uint32_t *lineno, uint32_t flags);
+
+       /* When adding entries and set is full, try to resize the set */
+       int (*resize)(struct ip_set *set, uint8_t retried);
+       /* Destroy the set */
+       void (*destroy)(struct ip_set *set);
+       /* Flush the elements */
+       void (*flush)(struct ip_set *set);
+
+       /* List set header data */
+       int (*head)(struct ip_set *set, struct sk_buff *skb);
+       /* List elements */
+       int (*list)(struct ip_set *set, struct sk_buff *skb,
+                   struct netlink_callback *cb);
+};
+
+/* Flags for the set type variants */
+enum ip_set_type_flags {
+       IP_SET_FLAG_VMALLOC_BIT = 0,
+       IP_SET_FLAG_VMALLOC = (1 << IP_SET_FLAG_VMALLOC_BIT),
+       IP_SET_FLAG_TIMEOUT_BIT = 1,
+       IP_SET_FLAG_TIMEOUT = (1 << IP_SET_FLAG_TIMEOUT_BIT),
+};
+
+/* The core set type structure */
 struct ip_set_type {
-       struct list_head list;  /* next in list of set types */
-
-       /* test for IP in set (kernel: iptables -m set src|dst)
-        * return 0 if not in set, 1 if in set.
-        */
-       int (*testip_kernel) (struct ip_set *set,
-                             const struct sk_buff * skb, 
-                             const u_int32_t *flags);
-
-       /* test for IP in set (userspace: ipset -T set IP)
-        * return 0 if not in set, 1 if in set.
-        */
-       int (*testip) (struct ip_set *set,
-                      const void *data, u_int32_t size);
-
-       /*
-        * Size of the data structure passed by when
-        * adding/deletin/testing an entry.
-        */
-       u_int32_t reqsize;
-
-       /* Add IP into set (userspace: ipset -A set IP)
-        * Return -EEXIST if the address is already in the set,
-        * and -ERANGE if the address lies outside the set bounds.
-        * If the address was not already in the set, 0 is returned.
-        */
-       int (*addip) (struct ip_set *set, 
-                     const void *data, u_int32_t size);
-
-       /* Add IP into set (kernel: iptables ... -j SET set src|dst)
-        * Return -EEXIST if the address is already in the set,
-        * and -ERANGE if the address lies outside the set bounds.
-        * If the address was not already in the set, 0 is returned.
-        */
-       int (*addip_kernel) (struct ip_set *set,
-                            const struct sk_buff * skb,
-                            const u_int32_t *flags);
-
-       /* remove IP from set (userspace: ipset -D set --entry x)
-        * Return -EEXIST if the address is NOT in the set,
-        * and -ERANGE if the address lies outside the set bounds.
-        * If the address really was in the set, 0 is returned.
-        */
-       int (*delip) (struct ip_set *set, 
-                     const void *data, u_int32_t size);
-
-       /* remove IP from set (kernel: iptables ... -j SET --entry x)
-        * Return -EEXIST if the address is NOT in the set,
-        * and -ERANGE if the address lies outside the set bounds.
-        * If the address really was in the set, 0 is returned.
-        */
-       int (*delip_kernel) (struct ip_set *set,
-                            const struct sk_buff * skb,
-                            const u_int32_t *flags);
-
-       /* new set creation - allocated type specific items
-        */
-       int (*create) (struct ip_set *set,
-                      const void *data, u_int32_t size);
-
-       /* retry the operation after successfully tweaking the set
-        */
-       int (*retry) (struct ip_set *set);
-
-       /* set destruction - free type specific items
-        * There is no return value.
-        * Can be called only when child sets are destroyed.
-        */
-       void (*destroy) (struct ip_set *set);
-
-       /* set flushing - reset all bits in the set, or something similar.
-        * There is no return value.
-        */
-       void (*flush) (struct ip_set *set);
-
-       /* Listing: size needed for header
-        */
-       u_int32_t header_size;
-
-       /* Listing: Get the header
-        *
-        * Fill in the information in "data".
-        * This function is always run after list_header_size() under a 
-        * writelock on the set. Therefor is the length of "data" always 
-        * correct. 
-        */
-       void (*list_header) (const struct ip_set *set, 
-                            void *data);
-
-       /* Listing: Get the size for the set members
-        */
-       int (*list_members_size) (const struct ip_set *set, char dont_align);
-
-       /* Listing: Get the set members
-        *
-        * Fill in the information in "data".
-        * This function is always run after list_member_size() under a 
-        * writelock on the set. Therefor is the length of "data" always 
-        * correct. 
-        */
-       void (*list_members) (const struct ip_set *set,
-                             void *data, char dont_align);
-
-       char typename[IP_SET_MAXNAMELEN];
-       unsigned char features;
-       int protocol_version;
+       struct list_head list;
+
+       /* Typename */
+       char name[IPSET_MAXNAMELEN];
+       /* Protocol version */
+       uint8_t protocol;
+       /* Set features to control swapping */
+       uint8_t features;
+       /* Supported family: may be AF_UNSPEC for both AF_INET/AF_INET6 */
+       uint8_t family;
+       /* Type revision */
+       uint8_t revision;
+
+       /* Create set */
+       int (*create)(struct ip_set *set,
+                     struct nlattr *head, int len, uint32_t flags);
 
        /* Set this to THIS_MODULE if you are a module, otherwise NULL */
        struct module *me;
 };
 
-extern int ip_set_register_set_type(struct ip_set_type *set_type);
-extern void ip_set_unregister_set_type(struct ip_set_type *set_type);
+extern int ip_set_type_register(struct ip_set_type *set_type);
+extern void ip_set_type_unregister(struct ip_set_type *set_type);
 
-/* A generic ipset */
+/* A generic IP set */
 struct ip_set {
-       char name[IP_SET_MAXNAMELEN];   /* the name of the set */
-       rwlock_t lock;                  /* lock for concurrency control */
-       ip_set_id_t id;                 /* set id for swapping */
-       atomic_t ref;                   /* in kernel and in hash references */
-       struct ip_set_type *type;       /* the set types */
-       void *data;                     /* pooltype specific data */
+       /* The name of the set */
+       char name[IPSET_MAXNAMELEN];
+       /* Lock protecting the set data */
+       rwlock_t lock;
+       /* References to the set */
+       atomic_t ref;
+       /* The core set type */
+       const struct ip_set_type *type;
+       /* The type variant doing the real job */
+       const struct ip_set_type_variant *variant;
+       /* The actual INET family */
+       uint8_t family;
+       /* Set type flags, filled/modified by create/resize */
+       uint8_t flags;
+       /* The type specific data */
+       void *data;
 };
 
 /* register and unregister set references */
-extern ip_set_id_t ip_set_get_byname(const char name[IP_SET_MAXNAMELEN]);
-extern ip_set_id_t ip_set_get_byindex(ip_set_id_t index);
+extern ip_set_id_t ip_set_get_byname(const char name[IPSET_MAXNAMELEN]);
 extern void ip_set_put_byindex(ip_set_id_t index);
-extern ip_set_id_t ip_set_id(ip_set_id_t index);
-extern ip_set_id_t __ip_set_get_byname(const char name[IP_SET_MAXNAMELEN],
-                                      struct ip_set **set);
-extern void __ip_set_put_byindex(ip_set_id_t index);
 
 /* API for iptables set match, and SET target */
-extern int ip_set_addip_kernel(ip_set_id_t id,
-                              const struct sk_buff *skb,
-                              const u_int32_t *flags);
-extern int ip_set_delip_kernel(ip_set_id_t id,
-                              const struct sk_buff *skb,
-                              const u_int32_t *flags);
-extern int ip_set_testip_kernel(ip_set_id_t id,
-                               const struct sk_buff *skb,
-                               const u_int32_t *flags);
-
-/* Macros to generate functions */
-
-#define STRUCT(pre, type)      CONCAT2(pre, type)
-#define CONCAT2(pre, type)     struct pre##type
-
-#define FNAME(pre, mid, post)  CONCAT3(pre, mid, post)
-#define CONCAT3(pre, mid, post)        pre##mid##post
-
-#define UADT0(type, adt, args...)                                      \
-static int                                                             \
-FNAME(type,_u,adt)(struct ip_set *set, const void *data, u_int32_t size)\
-{                                                                      \
-       const STRUCT(ip_set_req_,type) *req = data;                     \
-                                                                       \
-       return FNAME(type,_,adt)(set , ## args);                        \
+extern int ip_set_add(ip_set_id_t id, const struct sk_buff *skb,
+                     uint8_t family, const uint8_t *flags);
+extern int ip_set_del(ip_set_id_t id, const struct sk_buff *skb,
+                     uint8_t family, const uint8_t *flags);
+extern int ip_set_test(ip_set_id_t id, const struct sk_buff *skb,
+                      uint8_t family, const uint8_t *flags);
+
+/* Allocate members */
+static inline void *
+ip_set_alloc(size_t size, gfp_t gfp_mask, uint8_t *flags)
+{
+       void *members = kzalloc(size, gfp_mask);
+       
+       if (members) {
+               *flags &= ~IP_SET_FLAG_VMALLOC;
+               D("allocated with kmalloc %p", members);
+               return members;
+       }
+       
+       members = __vmalloc(size, gfp_mask | __GFP_ZERO, PAGE_KERNEL);
+       if (!members)
+               return NULL;
+       *flags |= IP_SET_FLAG_VMALLOC;
+       D("allocated with vmalloc %p", members);
+       
+       return members;
+}
+
+static inline void
+ip_set_free(void *members, uint8_t flags)
+{
+       D("free with %s %p", flags & IP_SET_FLAG_VMALLOC ? "vmalloc" : "kmalloc",
+         members);
+       if (flags & IP_SET_FLAG_VMALLOC)
+               vfree(members);
+       else
+               kfree(members);
 }
 
-#define UADT(type, adt, args...)                                       \
-       UADT0(type, adt, req->ip , ## args)
-
-#define KADT(type, adt, getfn, args...)                                        \
-static int                                                             \
-FNAME(type,_k,adt)(struct ip_set *set,                                 \
-            const struct sk_buff *skb,                                 \
-            const u_int32_t *flags)                                    \
-{                                                                      \
-       ip_set_ip_t ip = getfn(skb, flags);                             \
-                                                                       \
-       KADT_CONDITION                                                  \
-       return FNAME(type,_,adt)(set, ip , ##args);                     \
+/* Useful converters */
+static inline uint32_t
+ip_set_get_h32(const struct nlattr *attr)
+{
+       uint32_t value = nla_get_u32(attr);
+       
+       return attr->nla_type & NLA_F_NET_BYTEORDER ? ntohl(value) : value;
 }
 
-#define REGISTER_MODULE(type)                                          \
-static int __init ip_set_##type##_init(void)                           \
-{                                                                      \
-       init_max_page_size();                                           \
-       return ip_set_register_set_type(&ip_set_##type);                \
-}                                                                      \
-                                                                       \
-static void __exit ip_set_##type##_fini(void)                          \
-{                                                                      \
-       /* FIXME: possible race with ip_set_create() */                 \
-       ip_set_unregister_set_type(&ip_set_##type);                     \
-}                                                                      \
-                                                                       \
-module_init(ip_set_##type##_init);                                     \
-module_exit(ip_set_##type##_fini);
-
-/* Common functions */
-
-static inline ip_set_ip_t
-ipaddr(const struct sk_buff *skb, const u_int32_t *flags)
+static inline uint16_t
+ip_set_get_h16(const struct nlattr *attr)
 {
-       return ntohl(flags[0] & IPSET_SRC ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr);
+       uint16_t value = nla_get_u16(attr);
+       
+       return attr->nla_type & NLA_F_NET_BYTEORDER ? ntohs(value) : value;
 }
 
-#define jhash_ip(map, i, ip)   jhash_1word(ip, *(map->initval + i))
+static inline uint32_t
+ip_set_get_n32(const struct nlattr *attr)
+{
+       uint32_t value = nla_get_u32(attr);
+       
+       return attr->nla_type & NLA_F_NET_BYTEORDER ? value : htonl(value);
+}
+
+static inline uint16_t
+ip_set_get_n16(const struct nlattr *attr)
+{
+       uint16_t value = nla_get_u16(attr);
+       
+       return attr->nla_type & NLA_F_NET_BYTEORDER ? value : htons(value);
+}
+
+#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED)
+#define ipset_nest_end(skb, start)  nla_nest_end(skb, start)   
+
+#define NLA_PUT_NET32(skb, type, value)        \
+       NLA_PUT_BE32(skb, type | NLA_F_NET_BYTEORDER, value)
+
+#define NLA_PUT_NET16(skb, type, value)        \
+       NLA_PUT_BE16(skb, type | NLA_F_NET_BYTEORDER, value)
+
+/* Get address from skbuff */
+static inline uint32_t
+ip4addr(const struct sk_buff *skb, const uint8_t *flags)
+{
+       return flags[0] & IPSET_SRC ? ip_hdr(skb)->saddr
+                                   : ip_hdr(skb)->daddr;
+}
+
+static inline void
+ip4addrptr(const struct sk_buff *skb, const uint8_t *flags, uint32_t *addr)
+{
+       *addr = flags[0] & IPSET_SRC ? ip_hdr(skb)->saddr
+                                    : ip_hdr(skb)->daddr;
+}
+
+static inline void
+ip6addrptr(const struct sk_buff *skb, const uint8_t *flags,
+          struct in6_addr *addr)
+{
+       memcpy(addr, flags[0] & IPSET_SRC ? &ipv6_hdr(skb)->saddr
+                                         : &ipv6_hdr(skb)->daddr,
+              sizeof(*addr));
+}
 
 #define pack_ip_port(map, ip, port) \
        (port + ((ip - ((map)->first_ip)) << 16))
 
-#endif                         /* __KERNEL__ */
-
-#define UNUSED __attribute__ ((unused))
+#endif /* __KERNEL__ */
 
-#endif /*_IP_SET_H*/
+#endif /*_IP_SET_H */
index da3493f222b523743b512e53e7548fb1e5ef3153..49d0f5c43e91790becca8ab48c3727a66ca5ad9a 100644 (file)
-#ifndef __IP_SET_BITMAPS_H
-#define __IP_SET_BITMAPS_H
+#ifndef __IP_SET_BITMAP_H
+#define __IP_SET_BITMAP_H
 
-/* Macros to generate functions */
+/* Bitmap type specific error codes */
+enum {
+       IPSET_ERR_BITMAP_RANGE = IPSET_ERR_TYPE_SPECIFIC,
+       IPSET_ERR_BITMAP_RANGE_SIZE,
+};
 
 #ifdef __KERNEL__
-#define BITMAP_CREATE(type)                                            \
-static int                                                             \
-type##_create(struct ip_set *set, const void *data, u_int32_t size)    \
-{                                                                      \
-       int newbytes;                                                   \
-       const struct ip_set_req_##type##_create *req = data;            \
-       struct ip_set_##type *map;                                      \
-                                                                       \
-       if (req->from > req->to) {                                      \
-               DP("bad range");                                        \
-               return -ENOEXEC;                                        \
-       }                                                               \
-                                                                       \
-       map = kmalloc(sizeof(struct ip_set_##type), GFP_KERNEL);        \
-       if (!map) {                                                     \
-               DP("out of memory for %zu bytes",                       \
-                  sizeof(struct ip_set_##type));                       \
-               return -ENOMEM;                                         \
-       }                                                               \
-       map->first_ip = req->from;                                      \
-       map->last_ip = req->to;                                         \
-                                                                       \
-       newbytes = __##type##_create(req, map);                         \
-       if (newbytes < 0) {                                             \
-               kfree(map);                                             \
-               return newbytes;                                        \
-       }                                                               \
-                                                                       \
-       map->size = newbytes;                                           \
-       map->members = ip_set_malloc(newbytes);                         \
-       if (!map->members) {                                            \
-               DP("out of memory for %i bytes", newbytes);             \
-               kfree(map);                                             \
-               return -ENOMEM;                                         \
-       }                                                               \
-       memset(map->members, 0, newbytes);                              \
-                                                                       \
-       set->data = map;                                                \
-       return 0;                                                       \
-}
-
-#define BITMAP_DESTROY(type)                                           \
-static void                                                            \
-type##_destroy(struct ip_set *set)                                     \
-{                                                                      \
-       struct ip_set_##type *map = set->data;                          \
-                                                                       \
-       ip_set_free(map->members, map->size);                           \
-       kfree(map);                                                     \
-                                                                       \
-       set->data = NULL;                                               \
+#define IPSET_BITMAP_MAX_RANGE 0x0000FFFF
+
+/* Common functions */
+
+static inline uint32_t
+range_to_mask(uint32_t from, uint32_t to, uint8_t *bits)
+{
+       uint32_t mask = 0xFFFFFFFE;
+       
+       *bits = 32;
+       while (--(*bits) > 0 && mask && (to & mask) != from)
+               mask <<= 1;
+               
+       return mask;
 }
 
-#define BITMAP_FLUSH(type)                                             \
-static void                                                            \
-type##_flush(struct ip_set *set)                                       \
-{                                                                      \
-       struct ip_set_##type *map = set->data;                          \
-       memset(map->members, 0, map->size);                             \
-}
-
-#define BITMAP_LIST_HEADER(type)                                       \
-static void                                                            \
-type##_list_header(const struct ip_set *set, void *data)               \
-{                                                                      \
-       const struct ip_set_##type *map = set->data;                    \
-       struct ip_set_req_##type##_create *header = data;               \
-                                                                       \
-       header->from = map->first_ip;                                   \
-       header->to = map->last_ip;                                      \
-       __##type##_list_header(map, header);                            \
-}
-
-#define BITMAP_LIST_MEMBERS_SIZE(type, dtype, sizeid, testfn)          \
-static int                                                             \
-type##_list_members_size(const struct ip_set *set, char dont_align)    \
-{                                                                      \
-       const struct ip_set_##type *map = set->data;                    \
-       ip_set_ip_t i, elements = 0;                                    \
-                                                                       \
-       if (dont_align)                                                 \
-               return map->size;                                       \
-                                                                       \
-       for (i = 0; i < sizeid; i++)                                    \
-               if (testfn)                                             \
-                       elements++;                                     \
-                                                                       \
-       return elements * IPSET_ALIGN(sizeof(dtype));                   \
-}
-
-#define IP_SET_TYPE(type, __features)                                  \
-struct ip_set_type ip_set_##type = {                                   \
-       .typename               = #type,                                \
-       .features               = __features,                           \
-       .protocol_version       = IP_SET_PROTOCOL_VERSION,              \
-       .create                 = &type##_create,                       \
-       .destroy                = &type##_destroy,                      \
-       .flush                  = &type##_flush,                        \
-       .reqsize                = sizeof(struct ip_set_req_##type),     \
-       .addip                  = &type##_uadd,                         \
-       .addip_kernel           = &type##_kadd,                         \
-       .delip                  = &type##_udel,                         \
-       .delip_kernel           = &type##_kdel,                         \
-       .testip                 = &type##_utest,                        \
-       .testip_kernel          = &type##_ktest,                        \
-       .header_size            = sizeof(struct ip_set_req_##type##_create),\
-       .list_header            = &type##_list_header,                  \
-       .list_members_size      = &type##_list_members_size,            \
-       .list_members           = &type##_list_members,                 \
-       .me                     = THIS_MODULE,                          \
-};
-#endif /* __KERNEL */
-
-#endif /* __IP_SET_BITMAPS_H */
+#endif /* __KERNEL__ */
+       
+#endif /* __IP_SET_BITMAP_H */
index 18ed729422a9747b7c5b4ad900959060453ac147..855f12a281549db80366ea5aba444c514a95f7df 100644 (file)
@@ -2,45 +2,73 @@
 #define _IP_SET_GETPORT_H
 
 #ifdef __KERNEL__
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/ip.h>
 
-#define INVALID_PORT   (MAX_RANGE + 1)
+#define IPSET_INVALID_PORT     65536
 
 /* We must handle non-linear skbs */
-static inline ip_set_ip_t
-get_port(const struct sk_buff *skb, const u_int32_t *flags)
+static uint32_t
+get_port(uint8_t pf, const struct sk_buff *skb, const uint8_t *flags)
 {
-       struct iphdr *iph = ip_hdr(skb);
-       u_int16_t offset = ntohs(iph->frag_off) & IP_OFFSET;
-       switch (iph->protocol) {
-       case IPPROTO_TCP: {
-               struct tcphdr tcph;
+       unsigned short protocol;
+       unsigned int protoff;
+       int fragoff;
+       
+       switch (pf) {
+       case AF_INET: {
+               const struct iphdr *iph = ip_hdr(skb);
+
+               protocol = iph->protocol;
+               fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+               protoff = ip_hdrlen(skb);
+               break;
+       }
+       case AF_INET6: {
+               int protohdr;
+               unsigned short frag_off;
                
-               /* See comments at tcp_match in ip_tables.c */
-               if (offset)
-                       return INVALID_PORT;
+               protohdr = ipv6_find_hdr(skb, &protoff, -1, &frag_off);
+               if (protohdr < 0)
+                       return IPSET_INVALID_PORT;
 
-               if (skb_copy_bits(skb, ip_hdr(skb)->ihl*4, &tcph, sizeof(tcph)) < 0)
+               protocol = protohdr;
+               fragoff = frag_off;
+               break;
+       }
+       default:
+               return IPSET_INVALID_PORT;
+       }
+
+       /* See comments at tcp_match in ip_tables.c */
+       if (fragoff)
+               return IPSET_INVALID_PORT;
+
+       switch (protocol) {
+       case IPPROTO_TCP: {
+               struct tcphdr _tcph;
+               const struct tcphdr *th;
+               
+               th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+               if (th == NULL)
                        /* No choice either */
-                       return INVALID_PORT;
+                       return IPSET_INVALID_PORT;
                
-               return ntohs(flags[0] & IPSET_SRC ?
-                            tcph.source : tcph.dest);
+               return flags[0] & IPSET_SRC ? th->source : th->dest;
            }
        case IPPROTO_UDP: {
-               struct udphdr udph;
-
-               if (offset)
-                       return INVALID_PORT;
+               struct udphdr _udph;
+               const struct udphdr *uh;
 
-               if (skb_copy_bits(skb, ip_hdr(skb)->ihl*4, &udph, sizeof(udph)) < 0)
+               uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
+               if (uh == NULL)
                        /* No choice either */
-                       return INVALID_PORT;
+                       return IPSET_INVALID_PORT;
                
-               return ntohs(flags[0] & IPSET_SRC ?
-                            udph.source : udph.dest);
+               return flags[0] & IPSET_SRC ? uh->source : uh->dest;
            }
        default:
-               return INVALID_PORT;
+               return IPSET_INVALID_PORT;
        }
 }
 #endif                         /* __KERNEL__ */
index 8eeced31403df2b0b0909a66d2136115ec455a45..dd183b70bbd76a3d81891ef3a050f6f68cb740ef 100644 (file)
-#ifndef __IP_SET_HASHES_H
-#define __IP_SET_HASHES_H
+#ifndef __IP_SET_HASH_H
+#define __IP_SET_HASH_H
 
-#define initval_t uint32_t
-
-/* Macros to generate functions */
-
-#ifdef __KERNEL__
-#define HASH_RETRY0(type, dtype, cond)                                 \
-static int                                                             \
-type##_retry(struct ip_set *set)                                       \
-{                                                                      \
-       struct ip_set_##type *map = set->data, *tmp;                    \
-       dtype *elem;                                                    \
-       void *members;                                                  \
-       u_int32_t i, hashsize = map->hashsize;                          \
-       int res;                                                        \
-                                                                       \
-       if (map->resize == 0)                                           \
-               return -ERANGE;                                         \
-                                                                       \
-    again:                                                             \
-       res = 0;                                                        \
-                                                                       \
-       /* Calculate new hash size */                                   \
-       hashsize += (hashsize * map->resize)/100;                       \
-       if (hashsize == map->hashsize)                                  \
-               hashsize++;                                             \
-                                                                       \
-       ip_set_printk("rehashing of set %s triggered: "                 \
-                     "hashsize grows from %lu to %lu",                 \
-                     set->name,                                        \
-                     (long unsigned)map->hashsize,                     \
-                     (long unsigned)hashsize);                         \
-                                                                       \
-       tmp = kmalloc(sizeof(struct ip_set_##type)                      \
-                     + map->probes * sizeof(initval_t), GFP_ATOMIC);   \
-       if (!tmp) {                                                     \
-               DP("out of memory for %zu bytes",                       \
-                  sizeof(struct ip_set_##type)                         \
-                  + map->probes * sizeof(initval_t));                  \
-               return -ENOMEM;                                         \
-       }                                                               \
-       tmp->members = harray_malloc(hashsize, sizeof(dtype), GFP_ATOMIC);\
-       if (!tmp->members) {                                            \
-               DP("out of memory for %zu bytes", hashsize * sizeof(dtype));\
-               kfree(tmp);                                             \
-               return -ENOMEM;                                         \
-       }                                                               \
-       tmp->hashsize = hashsize;                                       \
-       tmp->elements = 0;                                              \
-       tmp->probes = map->probes;                                      \
-       tmp->resize = map->resize;                                      \
-       memcpy(tmp->initval, map->initval, map->probes * sizeof(initval_t));\
-       __##type##_retry(tmp, map);                                     \
-                                                                       \
-       write_lock_bh(&set->lock);                                      \
-       map = set->data; /* Play safe */                                \
-       for (i = 0; i < map->hashsize && res == 0; i++) {               \
-               elem = HARRAY_ELEM(map->members, dtype *, i);           \
-               if (cond)                                               \
-                       res = __##type##_add(tmp, elem);                \
-       }                                                               \
-       if (res) {                                                      \
-               /* Failure, try again */                                \
-               write_unlock_bh(&set->lock);                            \
-               harray_free(tmp->members);                              \
-               kfree(tmp);                                             \
-               goto again;                                             \
-       }                                                               \
-                                                                       \
-       /* Success at resizing! */                                      \
-       members = map->members;                                         \
-                                                                       \
-       map->hashsize = tmp->hashsize;                                  \
-       map->members = tmp->members;                                    \
-       write_unlock_bh(&set->lock);                                    \
-                                                                       \
-       harray_free(members);                                           \
-       kfree(tmp);                                                     \
-                                                                       \
-       return 0;                                                       \
-}
-
-#define HASH_RETRY(type, dtype)                                                \
-       HASH_RETRY0(type, dtype, *elem)
-
-#define HASH_RETRY2(type, dtype)                                               \
-       HASH_RETRY0(type, dtype, elem->ip || elem->ip1)
-
-#define HASH_CREATE(type, dtype)                                       \
-static int                                                             \
-type##_create(struct ip_set *set, const void *data, u_int32_t size)    \
-{                                                                      \
-       const struct ip_set_req_##type##_create *req = data;            \
-       struct ip_set_##type *map;                                      \
-       uint16_t i;                                                     \
-                                                                       \
-       if (req->hashsize < 1) {                                        \
-               ip_set_printk("hashsize too small");                    \
-               return -ENOEXEC;                                        \
-       }                                                               \
-                                                                       \
-       if (req->probes < 1) {                                          \
-               ip_set_printk("probes too small");                      \
-               return -ENOEXEC;                                        \
-       }                                                               \
-                                                                       \
-       map = kmalloc(sizeof(struct ip_set_##type)                      \
-                     + req->probes * sizeof(initval_t), GFP_KERNEL);   \
-       if (!map) {                                                     \
-               DP("out of memory for %zu bytes",                       \
-                  sizeof(struct ip_set_##type)                         \
-                  + req->probes * sizeof(initval_t));                  \
-               return -ENOMEM;                                         \
-       }                                                               \
-       for (i = 0; i < req->probes; i++)                               \
-               get_random_bytes(((initval_t *) map->initval)+i, 4);    \
-       map->elements = 0;                                              \
-       map->hashsize = req->hashsize;                                  \
-       map->probes = req->probes;                                      \
-       map->resize = req->resize;                                      \
-       if (__##type##_create(req, map)) {                              \
-               kfree(map);                                             \
-               return -ENOEXEC;                                        \
-       }                                                               \
-       map->members = harray_malloc(map->hashsize, sizeof(dtype), GFP_KERNEL);\
-       if (!map->members) {                                            \
-               DP("out of memory for %zu bytes", map->hashsize * sizeof(dtype));\
-               kfree(map);                                             \
-               return -ENOMEM;                                         \
-       }                                                               \
-                                                                       \
-       set->data = map;                                                \
-       return 0;                                                       \
-}
-
-#define HASH_DESTROY(type)                                             \
-static void                                                            \
-type##_destroy(struct ip_set *set)                                     \
-{                                                                      \
-       struct ip_set_##type *map = set->data;                          \
-                                                                       \
-       harray_free(map->members);                                      \
-       kfree(map);                                                     \
-                                                                       \
-       set->data = NULL;                                               \
-}
-
-#define HASH_FLUSH(type, dtype)                                                \
-static void                                                            \
-type##_flush(struct ip_set *set)                                       \
-{                                                                      \
-       struct ip_set_##type *map = set->data;                          \
-       harray_flush(map->members, map->hashsize, sizeof(dtype));       \
-       map->elements = 0;                                              \
-}
-
-#define HASH_FLUSH_CIDR(type, dtype)                                   \
-static void                                                            \
-type##_flush(struct ip_set *set)                                       \
-{                                                                      \
-       struct ip_set_##type *map = set->data;                          \
-       harray_flush(map->members, map->hashsize, sizeof(dtype));       \
-       memset(map->cidr, 0, sizeof(map->cidr));                        \
-       memset(map->nets, 0, sizeof(map->nets));                        \
-       map->elements = 0;                                              \
-}
-
-#define HASH_LIST_HEADER(type)                                         \
-static void                                                            \
-type##_list_header(const struct ip_set *set, void *data)               \
-{                                                                      \
-       const struct ip_set_##type *map = set->data;                    \
-       struct ip_set_req_##type##_create *header = data;               \
-                                                                       \
-       header->hashsize = map->hashsize;                               \
-       header->probes = map->probes;                                   \
-       header->resize = map->resize;                                   \
-       __##type##_list_header(map, header);                            \
-}
-
-#define HASH_LIST_MEMBERS_SIZE(type, dtype)                            \
-static int                                                             \
-type##_list_members_size(const struct ip_set *set, char dont_align)    \
-{                                                                      \
-       const struct ip_set_##type *map = set->data;                    \
-                                                                       \
-       return (map->elements * IPSET_VALIGN(sizeof(dtype), dont_align));\
-}
-
-#define HASH_LIST_MEMBERS(type, dtype)                                 \
-static void                                                            \
-type##_list_members(const struct ip_set *set, void *data, char dont_align)\
-{                                                                      \
-       const struct ip_set_##type *map = set->data;                    \
-       dtype *elem, *d;                                                \
-       uint32_t i, n = 0;                                              \
-                                                                       \
-       for (i = 0; i < map->hashsize; i++) {                           \
-               elem = HARRAY_ELEM(map->members, dtype *, i);           \
-               if (*elem) {                                            \
-                       d = data + n * IPSET_VALIGN(sizeof(dtype), dont_align);\
-                       *d = *elem;                                     \
-                       n++;                                            \
-               }                                                       \
-       }                                                               \
-}
-
-#define HASH_LIST_MEMBERS_MEMCPY(type, dtype, nonzero)                 \
-static void                                                            \
-type##_list_members(const struct ip_set *set, void *data, char dont_align)\
-{                                                                      \
-       const struct ip_set_##type *map = set->data;                    \
-       dtype *elem;                                                    \
-       uint32_t i, n = 0;                                              \
-                                                                       \
-       for (i = 0; i < map->hashsize; i++) {                           \
-               elem = HARRAY_ELEM(map->members, dtype *, i);           \
-               if (nonzero) {                                          \
-                       memcpy(data + n * IPSET_VALIGN(sizeof(dtype), dont_align),\
-                              elem, sizeof(dtype));                    \
-                       n++;                                            \
-               }                                                       \
-       }                                                               \
-}
-
-#define IP_SET_RTYPE(type, __features)                                 \
-struct ip_set_type ip_set_##type = {                                   \
-       .typename               = #type,                                \
-       .features               = __features,                           \
-       .protocol_version       = IP_SET_PROTOCOL_VERSION,              \
-       .create                 = &type##_create,                       \
-       .retry                  = &type##_retry,                        \
-       .destroy                = &type##_destroy,                      \
-       .flush                  = &type##_flush,                        \
-       .reqsize                = sizeof(struct ip_set_req_##type),     \
-       .addip                  = &type##_uadd,                         \
-       .addip_kernel           = &type##_kadd,                         \
-       .delip                  = &type##_udel,                         \
-       .delip_kernel           = &type##_kdel,                         \
-       .testip                 = &type##_utest,                        \
-       .testip_kernel          = &type##_ktest,                        \
-       .header_size            = sizeof(struct ip_set_req_##type##_create),\
-       .list_header            = &type##_list_header,                  \
-       .list_members_size      = &type##_list_members_size,            \
-       .list_members           = &type##_list_members,                 \
-       .me                     = THIS_MODULE,                          \
+/* Bitmap type specific error codes */
+enum {
+       IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC,
+       IPSET_ERR_HASH_ELEM,
 };
 
-/* Helper functions */
-static inline void
-add_cidr_size(uint8_t *cidr, uint8_t size)
-{
-       uint8_t next;
-       int i;
-       
-       for (i = 0; i < 30 && cidr[i]; i++) {
-               if (cidr[i] < size) {
-                       next = cidr[i];
-                       cidr[i] = size;
-                       size = next;
-               }
-       }
-       if (i < 30)
-               cidr[i] = size;
-}
-
-static inline void
-del_cidr_size(uint8_t *cidr, uint8_t size)
-{
-       int i;
-       
-       for (i = 0; i < 29 && cidr[i]; i++) {
-               if (cidr[i] == size)
-                       cidr[i] = size = cidr[i+1];
-       }
-       cidr[29] = 0;
-}
-#else
-#include <arpa/inet.h>
-#endif /* __KERNEL */
-
-#ifndef UINT16_MAX
-#define UINT16_MAX 65535
-#endif
-
-static unsigned char shifts[] = {255, 253, 249, 241, 225, 193, 129, 1};
-
-static inline ip_set_ip_t 
-pack_ip_cidr(ip_set_ip_t ip, unsigned char cidr)
-{
-       ip_set_ip_t addr, *paddr = &addr;
-       unsigned char n, t, *a;
-
-       addr = htonl(ip & (0xFFFFFFFF << (32 - (cidr))));
 #ifdef __KERNEL__
-       DP("ip:%u.%u.%u.%u/%u", NIPQUAD(addr), cidr);
-#endif
-       n = cidr / 8;
-       t = cidr % 8;   
-       a = &((unsigned char *)paddr)[n];
-       *a = *a /(1 << (8 - t)) + shifts[t];
-#ifdef __KERNEL__
-       DP("n: %u, t: %u, a: %u", n, t, *a);
-       DP("ip:%u.%u.%u.%u/%u, %u.%u.%u.%u",
-          HIPQUAD(ip), cidr, NIPQUAD(addr));
-#endif
 
-       return ntohl(addr);
-}
+#define initval_t uint32_t
 
+#define IPSET_DEFAULT_HASHSIZE         1024
+#define IPSET_DEFAULT_MAXELEM          65536
+#define IPSET_DEFAULT_PROBES           4
+#define IPSET_DEFAULT_RESIZE           50
 
-#endif /* __IP_SET_HASHES_H */
+#endif /* __KERNEL__ */
+       
+#endif /* __IP_SET_HASH_H */
index 2000b9ff14b661921022cac335e263e16e381f6f..90bfcc3d32433f1c85a4925fbecd29e728bbaf2b 100644 (file)
 #define __rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
 
 /* __jhash_mix - mix 3 32-bit values reversibly. */
-#define __jhash_mix(a,b,c) \
-{ \
-  a -= c;  a ^= __rot(c, 4);  c += b; \
-  b -= a;  b ^= __rot(a, 6);  a += c; \
-  c -= b;  c ^= __rot(b, 8);  b += a; \
-  a -= c;  a ^= __rot(c,16);  c += b; \
-  b -= a;  b ^= __rot(a,19);  a += c; \
-  c -= b;  c ^= __rot(b, 4);  b += a; \
+#define __jhash_mix(a,b,c)             \
+{                                      \
+  a -= c;  a ^= __rot(c, 4);  c += b;  \
+  b -= a;  b ^= __rot(a, 6);  a += c;  \
+  c -= b;  c ^= __rot(b, 8);  b += a;  \
+  a -= c;  a ^= __rot(c,16);  c += b;  \
+  b -= a;  b ^= __rot(a,19);  a += c;  \
+  c -= b;  c ^= __rot(b, 4);  b += a;  \
 }
 
 /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
-#define __jhash_final(a,b,c) \
-{ \
-  c ^= b; c -= __rot(b,14); \
-  a ^= c; a -= __rot(c,11); \
-  b ^= a; b -= __rot(a,25); \
-  c ^= b; c -= __rot(b,16); \
-  a ^= c; a -= __rot(c,4);  \
-  b ^= a; b -= __rot(a,14); \
-  c ^= b; c -= __rot(b,24); \
+#define __jhash_final(a,b,c)           \
+{                                      \
+  c ^= b; c -= __rot(b,14);            \
+  a ^= c; a -= __rot(c,11);            \
+  b ^= a; b -= __rot(a,25);            \
+  c ^= b; c -= __rot(b,16);            \
+  a ^= c; a -= __rot(c,4);             \
+  b ^= a; b -= __rot(a,14);            \
+  c ^= b; c -= __rot(b,24);            \
 }
 
-/* The golden ration: an arbitrary value */
-#define JHASH_GOLDEN_RATIO     0xdeadbeef
+/* An arbitrary value */
+#define JHASH_RANDOM_PARAM     0xdeadbeef
 
 /* The most generic version, hashes an arbitrary sequence
  * of bytes.  No alignment or length assumptions are made about
@@ -61,7 +61,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
        const u8 *k = key;
 
        /* Set up the internal state */
-       a = b = c = JHASH_GOLDEN_RATIO + length + initval;
+       a = b = c = JHASH_RANDOM_PARAM + length + initval;
 
        /* all but the last block: affect some 32 bits of (a,b,c) */
        while (length > 12) {
@@ -104,7 +104,7 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
        u32 a, b, c;
 
        /* Set up the internal state */
-       a = b = c = JHASH_GOLDEN_RATIO + (length<<2) + initval;
+       a = b = c = JHASH_RANDOM_PARAM + (length<<2) + initval;
 
        /* handle most of the key */
        while (length > 3) {
@@ -135,9 +135,9 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
  */
 static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
 {
-       a += JHASH_GOLDEN_RATIO + initval;
-       b += JHASH_GOLDEN_RATIO + initval;
-       c += JHASH_GOLDEN_RATIO + initval;
+       a += JHASH_RANDOM_PARAM + initval;
+       b += JHASH_RANDOM_PARAM + initval;
+       c += JHASH_RANDOM_PARAM + initval;
 
        __jhash_final(a, b, c);
 
index 0ce9d3fd09d4a911a9dfcfc48cd4ed893d3b6596..3af8fce6a80e1d51350635b889f1802cc7407a60 100644 (file)
@@ -1,6 +1,6 @@
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,55 +9,65 @@
 
 /* Kernel module for IP set management */
 
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-#include <linux/config.h>
-#endif
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/kmod.h>
+#include <linux/kernel.h>
 #include <linux/ip.h>
 #include <linux/skbuff.h>
-#include <linux/random.h>
-#include <linux/netfilter_ipv4/ip_set_jhash.h>
-#include <linux/errno.h>
-#include <linux/capability.h>
-#include <asm/uaccess.h>
-#include <asm/bitops.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
-#include <asm/semaphore.h>
-#else
-#include <linux/semaphore.h>
-#endif
 #include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
 #include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_set.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/ip_set.h>
+#include <linux/netfilter/ip_set_jhash.h>
 
-static struct list_head set_type_list;         /* all registered sets */
+static struct list_head ip_set_type_list;      /* all registered sets */
 static struct ip_set **ip_set_list;            /* all individual sets */
-static DEFINE_RWLOCK(ip_set_lock);             /* protects the lists and the hash */
-static struct semaphore ip_set_app_mutex;      /* serializes user access */
-static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX;
-static int protocol_version = IP_SET_PROTOCOL_VERSION;
+static DEFINE_MUTEX(ip_set_type_mutex);                /* protects ip_set_type_lists */
+static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX;
 
-#define STREQ(a,b)     (strncmp(a,b,IP_SET_MAXNAMELEN) == 0)
-#define DONT_ALIGN     (protocol_version == IP_SET_PROTOCOL_UNALIGNED)
-#define ALIGNED(len)   IPSET_VALIGN(len, DONT_ALIGN)
+#define STREQ(a,b)     (strncmp(a,b,IPSET_MAXNAMELEN) == 0)
+
+static int max_sets;
+
+module_param(max_sets, int, 0600);
+MODULE_PARM_DESC(max_sets, "maximal number of sets");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("core IP set support");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 
 /*
- * Sets are identified either by the index in ip_set_list or by id.
- * The id never changes. The index may change by swapping and used 
- * by external references (set/SET netfilter modules, etc.)
+ * The set types are implemented in modules and registered set types
+ * can be found in ip_set_type_list. Adding/deleting types is
+ * serialized by ip_set_type_list_lock/ip_set_type_list_unlock.
+ */
+
+static inline void
+ip_set_type_list_lock(void)
+{
+       mutex_lock(&ip_set_type_mutex);
+}
+
+static inline void
+ip_set_type_list_unlock(void)
+{
+       mutex_unlock(&ip_set_type_mutex);
+}
+
+/*
+ * Creating/destroying/renaming/swapping affect the existence and
+ * integrity of a set. All of these can be executed from userspace only
+ * and serialized by nfnl_lock/nfnl_unlock indirectly from nfnetlink.
+ *
+ * Sets are identified by their index in ip_set_list and the index 
+ * is used by the external references (set/SET netfilter modules).
  *
- * Userspace requests are serialized by ip_set_mutex and sets can
- * be deleted only from userspace. Therefore ip_set_list locking
- * must obey the following rules:
+ * The set behind an index may change by swapping.
  *
- * - kernel requests: read and write locking mandatory
- * - user requests: read locking optional, write locking mandatory
  */
 
 static inline void
@@ -75,226 +85,165 @@ __ip_set_put(ip_set_id_t index)
 /* Add, del and test set entries from kernel */
 
 int
-ip_set_testip_kernel(ip_set_id_t index,
-                    const struct sk_buff *skb,
-                    const u_int32_t *flags)
+ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
+           uint8_t family, const uint8_t *flags)
 {
        struct ip_set *set;
-       int res;
+       int ret = 0;
 
-       read_lock_bh(&ip_set_lock);
-       set = ip_set_list[index];
-       IP_SET_ASSERT(set);
-       DP("set %s, index %u", set->name, index);
+       rcu_read_lock();
+       set = rcu_dereference(ip_set_list[index]);
+       D("set %s, index %u", set->name, index);
 
        read_lock_bh(&set->lock);
-       res = set->type->testip_kernel(set, skb, flags);
+       ret = set->variant->kadt(set, skb, IPSET_TEST, family, flags);
        read_unlock_bh(&set->lock);
 
-       read_unlock_bh(&ip_set_lock);
+       if (ret == -EAGAIN) {
+               /* Type requests element to be re-added */
+               write_lock_bh(&set->lock);
+               set->variant->kadt(set, skb, IPSET_ADD, family, flags);
+               write_unlock_bh(&set->lock);
+               ret = 1;
+       }
+
+       rcu_read_unlock();
 
-       return (res < 0 ? 0 : res);
+       return (ret < 0 ? 0 : ret);
 }
 
 int
-ip_set_addip_kernel(ip_set_id_t index,
-                   const struct sk_buff *skb,
-                   const u_int32_t *flags)
+ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
+          uint8_t family, const uint8_t *flags)
 {
        struct ip_set *set;
-       int res;
+       int ret = 0, retried = 0;
 
-   retry:
-       read_lock_bh(&ip_set_lock);
-       set = ip_set_list[index];
-       IP_SET_ASSERT(set);
-       DP("set %s, index %u", set->name, index);
+retry:
+       rcu_read_lock();
+       set = rcu_dereference(ip_set_list[index]);
+       D("set %s, index %u", set->name, index);
 
        write_lock_bh(&set->lock);
-       res = set->type->addip_kernel(set, skb, flags);
+       ret = set->variant->kadt(set, skb, IPSET_ADD, family, flags);
        write_unlock_bh(&set->lock);
 
-       read_unlock_bh(&ip_set_lock);
-       /* Retry function called without holding any lock */
-       if (res == -EAGAIN
-           && set->type->retry
-           && (res = set->type->retry(set)) == 0)
+       rcu_read_unlock();
+       /* Retry function must be called without holding any lock */
+       if (ret == -EAGAIN
+           && set->variant->resize
+           && (ret = set->variant->resize(set, retried++)) == 0)
                goto retry;
        
-       return res;
+       return ret;
 }
 
 int
-ip_set_delip_kernel(ip_set_id_t index,
-                   const struct sk_buff *skb,
-                   const u_int32_t *flags)
+ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
+          uint8_t family, const uint8_t *flags)
 {
        struct ip_set *set;
-       int res;
+       int ret = 0;
 
-       read_lock_bh(&ip_set_lock);
-       set = ip_set_list[index];
-       IP_SET_ASSERT(set);
-       DP("set %s, index %u", set->name, index);
+       rcu_read_lock();
+       set = rcu_dereference(ip_set_list[index]);
+       D("set %s, index %u", set->name, index);
 
        write_lock_bh(&set->lock);
-       res = set->type->delip_kernel(set, skb, flags);
+       ret = set->variant->kadt(set, skb, IPSET_DEL, family, flags);
        write_unlock_bh(&set->lock);
 
-       read_unlock_bh(&ip_set_lock);
+       rcu_read_unlock();
        
-       return res;
+       return ret;
 }
 
 /* Register and deregister settype */
 
+#define family_name(f) ((f) == AF_INET ? "inet" : \
+                        (f) == AF_INET6 ? "inet6" : "any")
+
 static inline struct ip_set_type *
-find_set_type(const char *name)
+find_set_type(const char *name, uint8_t family, uint8_t revision)
 {
-       struct ip_set_type *set_type;
+       struct ip_set_type *type;
 
-       list_for_each_entry(set_type, &set_type_list, list)
-               if (STREQ(set_type->typename, name))
-                       return set_type;
+       list_for_each_entry(type, &ip_set_type_list, list)
+               if (STREQ(type->name, name)
+                   && (type->family == family || type->family == AF_UNSPEC)
+                   && type->revision == revision)
+                       return type;
        return NULL;
 }
 
 int
-ip_set_register_set_type(struct ip_set_type *set_type)
+ip_set_type_register(struct ip_set_type *type)
 {
        int ret = 0;
        
-       if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) {
-               ip_set_printk("'%s' uses wrong protocol version %u (want %u)",
-                             set_type->typename,
-                             set_type->protocol_version,
-                             IP_SET_PROTOCOL_VERSION);
+       if (type->protocol != IPSET_PROTOCOL) {
+               printk("set type %s, family %s, revision %u uses "
+                      "wrong protocol version %u (want %u)\n",
+                      type->name, family_name(type->family), type->revision,
+                      type->protocol, IPSET_PROTOCOL);
                return -EINVAL;
        }
 
-       write_lock_bh(&ip_set_lock);
-       if (find_set_type(set_type->typename)) {
+       ip_set_type_list_lock();
+       if (find_set_type(type->name, type->family, type->revision)) {
                /* Duplicate! */
-               ip_set_printk("'%s' already registered!",
-                             set_type->typename);
+               printk("type %s, family %s, revision %u already registered!\n",
+                      type->name, family_name(type->family), type->revision);
                ret = -EINVAL;
                goto unlock;
        }
-       if (!try_module_get(THIS_MODULE)) {
-               ret = -EFAULT;
-               goto unlock;
-       }
-       list_add(&set_type->list, &set_type_list);
-       DP("'%s' registered.", set_type->typename);
-   unlock:
-       write_unlock_bh(&ip_set_lock);
+       list_add(&type->list, &ip_set_type_list);
+       D("type %s, family %s, revision %u registered.",
+         type->name, family_name(type->family), type->revision);
+unlock:
+       ip_set_type_list_unlock();
        return ret;
 }
 
 void
-ip_set_unregister_set_type(struct ip_set_type *set_type)
+ip_set_type_unregister(struct ip_set_type *type)
 {
-       write_lock_bh(&ip_set_lock);
-       if (!find_set_type(set_type->typename)) {
-               ip_set_printk("'%s' not registered?",
-                             set_type->typename);
+       ip_set_type_list_lock();
+       if (!find_set_type(type->name, type->family, type->revision)) {
+               printk("type %s, family %s, revision %u not registered\n",
+                      type->name, family_name(type->family), type->revision);
                goto unlock;
        }
-       list_del(&set_type->list);
-       module_put(THIS_MODULE);
-       DP("'%s' unregistered.", set_type->typename);
-   unlock:
-       write_unlock_bh(&ip_set_lock);
-
+       list_del(&type->list);
+       D("type %s, family %s, revision %u unregistered.",
+         type->name, family_name(type->family), type->revision);
+unlock:
+       ip_set_type_list_unlock();
 }
 
-ip_set_id_t
-__ip_set_get_byname(const char *name, struct ip_set **set)
-{
-       ip_set_id_t i, index = IP_SET_INVALID_ID;
-       
-       for (i = 0; i < ip_set_max; i++) {
-               if (ip_set_list[i] != NULL
-                   && STREQ(ip_set_list[i]->name, name)) {
-                       __ip_set_get(i);
-                       index = i;
-                       *set = ip_set_list[i];
-                       break;
-               }
-       }
-       return index;
-}
-
-void
-__ip_set_put_byindex(ip_set_id_t index)
-{
-       if (ip_set_list[index])
-               __ip_set_put(index);
-}
-
-/*
- * Userspace routines
- */
+/* Get/put a set with referencing */
 
 /*
  * Find set by name, reference it once. The reference makes sure the
  * thing pointed to, does not go away under our feet. Drop the reference
- * later, using ip_set_put().
+ * later, using ip_set_put*().
  */
 ip_set_id_t
 ip_set_get_byname(const char *name)
 {
-       ip_set_id_t i, index = IP_SET_INVALID_ID;
+       ip_set_id_t i, index = IPSET_INVALID_ID;
        
-       down(&ip_set_app_mutex);
-       for (i = 0; i < ip_set_max; i++) {
-               if (ip_set_list[i] != NULL
-                   && STREQ(ip_set_list[i]->name, name)) {
+       nfnl_lock();    
+       for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++)
+               if (STREQ(ip_set_list[i]->name, name)) {
                        __ip_set_get(i);
                        index = i;
-                       break;
                }
-       }
-       up(&ip_set_app_mutex);
-       return index;
-}
-
-/*
- * Find set by index, reference it once. The reference makes sure the
- * thing pointed to, does not go away under our feet. Drop the reference
- * later, using ip_set_put().
- */
-ip_set_id_t
-ip_set_get_byindex(ip_set_id_t index)
-{
-       down(&ip_set_app_mutex);
+       nfnl_unlock();
 
-       if (index >= ip_set_max)
-               return IP_SET_INVALID_ID;
-       
-       if (ip_set_list[index])
-               __ip_set_get(index);
-       else
-               index = IP_SET_INVALID_ID;
-               
-       up(&ip_set_app_mutex);
        return index;
 }
 
-/*
- * Find the set id belonging to the index.
- * We are protected by the mutex, so we do not need to use
- * ip_set_lock. There is no need to reference the sets either.
- */
-ip_set_id_t
-ip_set_id(ip_set_id_t index)
-{
-       if (index >= ip_set_max || !ip_set_list[index])
-               return IP_SET_INVALID_ID;
-       
-       return ip_set_list[index]->id;
-}
-
 /*
  * If the given set pointer points to a valid set, decrement
  * reference count by 1. The caller shall not assume the index
@@ -303,1227 +252,1047 @@ ip_set_id(ip_set_id_t index)
 void
 ip_set_put_byindex(ip_set_id_t index)
 {
-       down(&ip_set_app_mutex);
+       nfnl_lock();
        if (ip_set_list[index])
                __ip_set_put(index);
-       up(&ip_set_app_mutex);
+       nfnl_unlock();
 }
 
-/* Find a set by name or index */
 static ip_set_id_t
-ip_set_find_byname(const char *name)
+find_set_id(const char *name)
 {
-       ip_set_id_t i, index = IP_SET_INVALID_ID;
+       ip_set_id_t i, index = IPSET_INVALID_ID;
        
-       for (i = 0; i < ip_set_max; i++) {
+       for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
                if (ip_set_list[i] != NULL
-                   && STREQ(ip_set_list[i]->name, name)) {
+                   && STREQ(ip_set_list[i]->name, name))
                        index = i;
-                       break;
-               }
        }
        return index;
 }
 
 static ip_set_id_t
-ip_set_find_byindex(ip_set_id_t index)
+find_set_id_rcu(const char *name)
 {
-       if (index >= ip_set_max || ip_set_list[index] == NULL)
-               index = IP_SET_INVALID_ID;
+       ip_set_id_t i, index = IPSET_INVALID_ID;
+       struct ip_set *set;
        
+       for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
+               set = rcu_dereference(ip_set_list[i]);
+               if (set != NULL && STREQ(set->name, name))
+                       index = i;
+       }
        return index;
 }
 
-/*
- * Add, del and test
- */
-
-static int
-ip_set_addip(struct ip_set *set, const void *data, u_int32_t size)
+static struct ip_set *
+find_set(const char *name)
 {
-       int res;
-       
-       IP_SET_ASSERT(set);
-       do {
-               write_lock_bh(&set->lock);
-               res = set->type->addip(set, data, size);
-               write_unlock_bh(&set->lock);
-       } while (res == -EAGAIN
-                && set->type->retry
-                && (res = set->type->retry(set)) == 0);
+       ip_set_id_t index = find_set_id(name);
 
-       return res;
+       return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
 }
 
-static int
-ip_set_delip(struct ip_set *set, const void *data, u_int32_t size)
-{
-       int res;
-       
-       IP_SET_ASSERT(set);
-
-       write_lock_bh(&set->lock);
-       res = set->type->delip(set, data, size);
-       write_unlock_bh(&set->lock);
+/* Communication protocol with userspace over netlink */
+
+/* Create a set */
+
+static const struct nla_policy
+ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
+       [IPSET_ATTR_SETNAME]    = { .type = NLA_STRING,
+                                   .len = IPSET_MAXNAMELEN },
+       [IPSET_ATTR_TYPENAME]   = { .type = NLA_STRING,
+                                   .len = IPSET_MAXNAMELEN },
+       [IPSET_ATTR_REVISION]   = { .type = NLA_U8 },
+       [IPSET_ATTR_FAMILY]     = { .type = NLA_U8 },
+       [IPSET_ATTR_LINENO]     = { .type = NLA_U32 },
+       [IPSET_ATTR_DATA]       = { .type = NLA_NESTED },
+};
 
-       return res;
+static inline bool
+protocol_failed(const struct nlattr * const tb[])
+{
+       return !tb[IPSET_ATTR_PROTOCOL]
+              || nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
 }
 
-static int
-ip_set_testip(struct ip_set *set, const void *data, u_int32_t size)
+static inline uint32_t
+flag_exist(const struct nlmsghdr *nlh)
 {
-       int res;
-
-       IP_SET_ASSERT(set);
-       
-       read_lock_bh(&set->lock);
-       res = set->type->testip(set, data, size);
-       read_unlock_bh(&set->lock);
+       return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
+}
 
-       return (res > 0 ? -EEXIST : res);
+static inline bool
+flag_nested(const struct nlattr *nla)
+{
+       return nla->nla_type & NLA_F_NESTED;
 }
 
 static struct ip_set_type *
-find_set_type_rlock(const char *typename)
+find_set_type_lock(const char *name, uint8_t family, uint8_t revision)
 {
        struct ip_set_type *type;
        
-       read_lock_bh(&ip_set_lock);
-       type = find_set_type(typename);
+       ip_set_type_list_lock();
+       type = find_set_type(name, family, revision);
        if (type == NULL)
-               read_unlock_bh(&ip_set_lock);
+               ip_set_type_list_unlock();
 
        return type;
 }
 
 static int
-find_free_id(const char *name,
-            ip_set_id_t *index,
-            ip_set_id_t *id)
+find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
 {
        ip_set_id_t i;
 
-       *id = IP_SET_INVALID_ID;
+       *index = IPSET_INVALID_ID;
        for (i = 0;  i < ip_set_max; i++) {
                if (ip_set_list[i] == NULL) {
-                       if (*id == IP_SET_INVALID_ID)
-                               *id = *index = i;
-               } else if (STREQ(name, ip_set_list[i]->name))
+                       if (*index == IPSET_INVALID_ID)
+                               *index = i;
+               } else if (STREQ(name, ip_set_list[i]->name)) {
                        /* Name clash */
+                       *set = ip_set_list[i];
                        return -EEXIST;
-       }
-       if (*id == IP_SET_INVALID_ID)
-               /* No free slot remained */
-               return -ERANGE;
-       /* Check that index is usable as id (swapping) */
-    check:     
-       for (i = 0;  i < ip_set_max; i++) {
-               if (ip_set_list[i] != NULL
-                   && ip_set_list[i]->id == *id) {
-                   *id = i;
-                   goto check;
                }
        }
+       if (*index == IPSET_INVALID_ID)
+               /* No free slot remained */
+               return -IPSET_ERR_MAX_SETS;
        return 0;
 }
 
-/*
- * Create a set
- */
-static int
-ip_set_create(const char *name,
-             const char *typename,
-             ip_set_id_t restore,
-             const void *data,
-             u_int32_t size)
+static struct nlmsghdr *
+start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
+         enum ipset_cmd cmd)
 {
-       struct ip_set *set;
-       ip_set_id_t index = 0, id;
-       int res = 0;
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+
+       nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
+                       sizeof(*nfmsg), flags);
+       if (nlh == NULL)
+               return NULL;
+
+       nfmsg = nlmsg_data(nlh);
+       nfmsg->nfgen_family = AF_INET;
+       nfmsg->version = NFNETLINK_V0;
+       nfmsg->res_id = 0;
+       
+       return nlh;
+}
+
+static inline void
+load_type_module(const char *typename)
+{
+       D("try to load ip_set_%s", typename);
+       request_module("ip_set_%s", typename);
+}
 
-       DP("setname: %s, typename: %s, id: %u", name, typename, restore);
+static int
+ip_set_create(struct sock *ctnl, struct sk_buff *skb,
+             const struct nlmsghdr *nlh,
+             const struct nlattr * const attr[])
+{
+       struct ip_set *set, *clash;
+       ip_set_id_t index = IPSET_INVALID_ID;
+       const char *name, *typename;
+       uint8_t family, revision;
+       uint32_t flags = flag_exist(nlh);
+       int ret = 0, len;
+
+       if (unlikely(protocol_failed(attr)      
+                    || attr[IPSET_ATTR_SETNAME] == NULL
+                    || attr[IPSET_ATTR_TYPENAME] == NULL
+                    || attr[IPSET_ATTR_REVISION] == NULL
+                    || attr[IPSET_ATTR_FAMILY] == NULL
+                    || (attr[IPSET_ATTR_DATA] != NULL
+                        && !flag_nested(attr[IPSET_ATTR_DATA]))))
+               return -IPSET_ERR_PROTOCOL;
+
+       name = nla_data(attr[IPSET_ATTR_SETNAME]);
+       typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
+       family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
+       revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
+       D("setname: %s, typename: %s, family: %s, revision: %u",
+         name, typename, family_name(family), revision);
 
        /*
         * First, and without any locks, allocate and initialize
         * a normal base set structure.
         */
-       set = kmalloc(sizeof(struct ip_set), GFP_KERNEL);
+       set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
        if (!set)
                return -ENOMEM;
        rwlock_init(&set->lock);
-       strncpy(set->name, name, IP_SET_MAXNAMELEN);
+       strncpy(set->name, name, IPSET_MAXNAMELEN);
        atomic_set(&set->ref, 0);
 
        /*
-        * Next, take the &ip_set_lock, check that we know the type,
-        * and take a reference on the type, to make sure it
-        * stays available while constructing our new set.
+        * Next, check that we know the type, and take
+        * a reference on the type, to make sure it stays available
+        * while constructing our new set.
         *
-        * After referencing the type, we drop the &ip_set_lock,
-        * and let the new set construction run without locks.
+        * After referencing the type, we try to create the type
+        * specific part of the set without holding any locks.
         */
-       set->type = find_set_type_rlock(typename);
+       set->type = find_set_type_lock(typename, family, revision);
        if (set->type == NULL) {
                /* Try loading the module */
-               char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1];
-               strcpy(modulename, "ip_set_");
-               strcat(modulename, typename);
-               DP("try to load %s", modulename);
-               request_module(modulename);
-               set->type = find_set_type_rlock(typename);
-       }
-       if (set->type == NULL) {
-               ip_set_printk("no set type '%s', set '%s' not created",
-                             typename, name);
-               res = -ENOENT;
-               goto out;
+               load_type_module(typename);
+               set->type = find_set_type_lock(typename, family, revision);
+               if (set->type == NULL) {
+                       printk("Can't find type %s, family %s, revision %u:"
+                              " set '%s' not created",
+                              typename, family_name(family), revision, name);
+                       ret = -IPSET_ERR_FIND_TYPE;
+                       goto out;
+               }
        }
        if (!try_module_get(set->type->me)) {
-               read_unlock_bh(&ip_set_lock);
-               res = -EFAULT;
+               ip_set_type_list_unlock();
+               ret = -EFAULT;
                goto out;
        }
-       read_unlock_bh(&ip_set_lock);
-
-       /* Check request size */
-       if (size != set->type->header_size) {
-               ip_set_printk("data length wrong (want %lu, have %lu)",
-                             (long unsigned)set->type->header_size,
-                             (long unsigned)size);
-               goto put_out;
-       }
+       ip_set_type_list_unlock();
 
        /*
         * Without holding any locks, create private part.
         */
-       res = set->type->create(set, data, size);
-       if (res != 0)
+       len = attr[IPSET_ATTR_DATA] ? nla_len(attr[IPSET_ATTR_DATA]) : 0;
+       D("data len: %u", len);
+       ret = set->type->create(set, attr[IPSET_ATTR_DATA] ?
+                               nla_data(attr[IPSET_ATTR_DATA]) : NULL, len,
+                               flags);
+       if (ret != 0)
                goto put_out;
 
-       /* BTW, res==0 here. */
+       /* BTW, ret==0 here. */
 
        /*
-        * Here, we have a valid, constructed set. &ip_set_lock again,
-        * find free id/index and check that it is not already in
-        * ip_set_list.
+        * Here, we have a valid, constructed set and we are protected
+        * by nfnl_lock. Find the first free index in ip_set_list and
+        * check clashing.
         */
-       write_lock_bh(&ip_set_lock);
-       if ((res = find_free_id(set->name, &index, &id)) != 0) {
-               DP("no free id!");
+       if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
+               /* If this is the same set and requested, ignore error */
+               if (ret == -EEXIST
+                   && (flags & IPSET_FLAG_EXIST)
+                   && STREQ(set->type->name, clash->type->name)
+                   && set->type->family == clash->type->family
+                   && set->type->revision == clash->type->revision)
+                       ret = 0;
                goto cleanup;
        }
 
-       /* Make sure restore gets the same index */
-       if (restore != IP_SET_INVALID_ID && index != restore) {
-               DP("Can't restore, sets are screwed up");
-               res = -ERANGE;
-               goto cleanup;
-       }
-       
        /*
         * Finally! Add our shiny new set to the list, and be done.
         */
-       DP("create: '%s' created with index %u, id %u!", set->name, index, id);
-       set->id = id;
+       D("create: '%s' created with index %u!", set->name, index);
        ip_set_list[index] = set;
-       write_unlock_bh(&ip_set_lock);
-       return res;
+
+       return ret;
        
-    cleanup:
-       write_unlock_bh(&ip_set_lock);
-       set->type->destroy(set);
-    put_out:
+cleanup:
+       set->variant->destroy(set);
+put_out:
        module_put(set->type->me);
-    out:
+out:
        kfree(set);
-       return res;
+       return ret;
 }
 
-/*
- * Destroy a given existing set
- */
-static void
+/* Destroy sets */
+
+static const struct nla_policy
+ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
+       [IPSET_ATTR_SETNAME]    = { .type = NLA_STRING,
+                                   .len = IPSET_MAXNAMELEN },
+};
+
+static inline void
 ip_set_destroy_set(ip_set_id_t index)
 {
        struct ip_set *set = ip_set_list[index];
 
-       IP_SET_ASSERT(set);
-       DP("set: %s",  set->name);
-       write_lock_bh(&ip_set_lock);
+       D("set: %s",  set->name);
        ip_set_list[index] = NULL;
-       write_unlock_bh(&ip_set_lock);
 
        /* Must call it without holding any lock */
-       set->type->destroy(set);
+       set->variant->destroy(set);
        module_put(set->type->me);
        kfree(set);
 }
 
-/*
- * Destroy a set - or all sets
- * Sets must not be referenced/used.
- */
 static int
-ip_set_destroy(ip_set_id_t index)
+ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
+              const struct nlmsghdr *nlh,
+              const struct nlattr * const attr[])
 {
        ip_set_id_t i;
+       
+       if (unlikely(protocol_failed(attr)))
+               return -IPSET_ERR_PROTOCOL;
 
-       /* ref modification always protected by the mutex */
-       if (index != IP_SET_INVALID_ID) {
-               if (atomic_read(&ip_set_list[index]->ref))
-                       return -EBUSY;
-               ip_set_destroy_set(index);
-       } else {
+       /* References are protected by the nfnl mutex */
+       if (!attr[IPSET_ATTR_SETNAME]) {
                for (i = 0; i < ip_set_max; i++) {
                        if (ip_set_list[i] != NULL
                            && (atomic_read(&ip_set_list[i]->ref)))
-                               return -EBUSY;
+                               return -IPSET_ERR_BUSY;
                }
-
                for (i = 0; i < ip_set_max; i++) {
                        if (ip_set_list[i] != NULL)
                                ip_set_destroy_set(i);
                }
+       } else {
+               i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
+               if (i == IPSET_INVALID_ID)
+                       return -EEXIST;
+               else if (atomic_read(&ip_set_list[i]->ref))
+                       return -IPSET_ERR_BUSY;
+
+               ip_set_destroy_set(i);
        }
        return 0;
 }
 
-static void
+/* Flush sets */
+
+static inline void
 ip_set_flush_set(struct ip_set *set)
 {
-       DP("set: %s %u",  set->name, set->id);
+       D("set: %s",  set->name);
 
        write_lock_bh(&set->lock);
-       set->type->flush(set);
+       set->variant->flush(set);
        write_unlock_bh(&set->lock);
 }
 
-/*
- * Flush data in a set - or in all sets
- */
 static int
-ip_set_flush(ip_set_id_t index)
+ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
+            const struct nlmsghdr *nlh,
+            const struct nlattr * const attr[])
 {
-       if (index != IP_SET_INVALID_ID) {
-               IP_SET_ASSERT(ip_set_list[index]);
-               ip_set_flush_set(ip_set_list[index]);
-       } else {
-               ip_set_id_t i;
-               
+       ip_set_id_t i;
+
+       if (unlikely(protocol_failed(attr)))
+               return -EPROTO;
+
+       if (!attr[IPSET_ATTR_SETNAME]) {
                for (i = 0; i < ip_set_max; i++)
                        if (ip_set_list[i] != NULL)
                                ip_set_flush_set(ip_set_list[i]);
+       } else {
+               i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
+               if (i == IPSET_INVALID_ID)
+                       return -EEXIST;
+
+               ip_set_flush_set(ip_set_list[i]);
        }
 
        return 0;
 }
 
 /* Rename a set */
+
+static const struct nla_policy
+ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
+       [IPSET_ATTR_SETNAME]    = { .type = NLA_STRING,
+                                   .len = IPSET_MAXNAMELEN },
+       [IPSET_ATTR_SETNAME2]   = { .type = NLA_STRING,
+                                   .len = IPSET_MAXNAMELEN },
+};
+
 static int
-ip_set_rename(ip_set_id_t index, const char *name)
+ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
+             const struct nlmsghdr *nlh,
+             const struct nlattr * const attr[])
 {
-       struct ip_set *set = ip_set_list[index];
+       struct ip_set *set;
+       const char *name2;
        ip_set_id_t i;
-       int res = 0;
 
-       DP("set: %s to %s",  set->name, name);
-       write_lock_bh(&ip_set_lock);
+       if (unlikely(protocol_failed(attr)
+                    || attr[IPSET_ATTR_SETNAME] == NULL
+                    || attr[IPSET_ATTR_SETNAME2] == NULL))
+               return -IPSET_ERR_PROTOCOL;
+
+       set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+       if (set == NULL)
+               return -EEXIST;
+
+       name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
        for (i = 0; i < ip_set_max; i++) {
                if (ip_set_list[i] != NULL
-                   && STREQ(ip_set_list[i]->name, name)) {
-                       res = -EEXIST;
-                       goto unlock;
-               }
+                   && STREQ(ip_set_list[i]->name, name2))
+                       return -IPSET_ERR_EXIST_SETNAME2;
        }
-       strncpy(set->name, name, IP_SET_MAXNAMELEN);
-    unlock:
-       write_unlock_bh(&ip_set_lock);
-       return res;
+       strncpy(set->name, name2, IPSET_MAXNAMELEN);
+
+       return 0;
 }
 
-/*
- * Swap two sets so that name/index points to the other.
- * References are also swapped.
- */
+/* Swap two sets so that name/index points to the other.
+ * References are also swapped. */
+
 static int
-ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index)
+ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
+           const struct nlmsghdr *nlh,
+           const struct nlattr * const attr[])
 {
-       struct ip_set *from = ip_set_list[from_index];
-       struct ip_set *to = ip_set_list[to_index];
-       char from_name[IP_SET_MAXNAMELEN];
-       u_int32_t from_ref;
+       struct ip_set *from, *to;
+       ip_set_id_t from_id, to_id;
+       char from_name[IPSET_MAXNAMELEN];
+       uint32_t from_ref;
+       
+       if (unlikely(protocol_failed(attr)
+                    || attr[IPSET_ATTR_SETNAME] == NULL
+                    || attr[IPSET_ATTR_SETNAME2] == NULL))
+               return -IPSET_ERR_PROTOCOL;
 
-       DP("set: %s to %s",  from->name, to->name);
-       /* Features must not change. 
+       from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
+       if (from_id == IPSET_INVALID_ID)
+               return -EEXIST;
+
+       to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
+       if (to_id == IPSET_INVALID_ID)
+               return -IPSET_ERR_EXIST_SETNAME2;
+
+       from = ip_set_list[from_id];
+       to = ip_set_list[to_id];
+       
+       /* Features must not change.
         * Not an artifical restriction anymore, as we must prevent
         * possible loops created by swapping in setlist type of sets. */
-       if (from->type->features != to->type->features)
-               return -ENOEXEC;
+       if (!(from->type->features == to->type->features
+             && from->type->family == to->type->family))
+               return -IPSET_ERR_TYPE_MISMATCH;
 
        /* No magic here: ref munging protected by the mutex */ 
-       write_lock_bh(&ip_set_lock);
-       strncpy(from_name, from->name, IP_SET_MAXNAMELEN);
+       strncpy(from_name, from->name, IPSET_MAXNAMELEN);
        from_ref = atomic_read(&from->ref);
 
-       strncpy(from->name, to->name, IP_SET_MAXNAMELEN);
+       strncpy(from->name, to->name, IPSET_MAXNAMELEN);
        atomic_set(&from->ref, atomic_read(&to->ref));
-       strncpy(to->name, from_name, IP_SET_MAXNAMELEN);
+       strncpy(to->name, from_name, IPSET_MAXNAMELEN);
        atomic_set(&to->ref, from_ref);
        
-       ip_set_list[from_index] = to;
-       ip_set_list[to_index] = from;
-       
-       write_unlock_bh(&ip_set_lock);
+       rcu_assign_pointer(ip_set_list[from_id], to);
+       rcu_assign_pointer(ip_set_list[to_id], from);
+       synchronize_rcu();
+
        return 0;
 }
 
-/*
- * List set data
- */
+/* List/save set data */
 
 static int
-ip_set_list_set(ip_set_id_t index, void *data, int *used, int len)
+ip_set_dump_done(struct netlink_callback *cb)
 {
-       struct ip_set *set = ip_set_list[index];
-       struct ip_set_list *set_list;
-
-       /* Pointer to our header */
-       set_list = data + *used;
-
-       DP("set: %s, used: %d  len %u %p %p", set->name, *used, len, data, data + *used);
-
-       /* Get and ensure header size */
-       if (*used + ALIGNED(sizeof(struct ip_set_list)) > len)
-               goto not_enough_mem;
-       *used += ALIGNED(sizeof(struct ip_set_list));
-
-       read_lock_bh(&set->lock);
-       /* Get and ensure set specific header size */
-       set_list->header_size = ALIGNED(set->type->header_size);
-       if (*used + set_list->header_size > len)
-               goto unlock_set;
-
-       /* Fill in the header */
-       set_list->index = index;
-       set_list->binding = IP_SET_INVALID_ID;
-       set_list->ref = atomic_read(&set->ref);
-
-       /* Fill in set spefific header data */
-       set->type->list_header(set, data + *used);
-       *used += set_list->header_size;
-
-       /* Get and ensure set specific members size */
-       set_list->members_size = set->type->list_members_size(set, DONT_ALIGN);
-       if (*used + set_list->members_size > len)
-               goto unlock_set;
-
-       /* Fill in set spefific members data */
-       set->type->list_members(set, data + *used, DONT_ALIGN);
-       *used += set_list->members_size;
-       read_unlock_bh(&set->lock);
-
-       /* Bindings */
-       set_list->bindings_size = 0;
-
+       if (cb->args[2])
+               __ip_set_put((ip_set_id_t) cb->args[1]);
        return 0;
+}
 
-    unlock_set:
-       read_unlock_bh(&set->lock);
-    not_enough_mem:
-       DP("not enough mem, try again");
-       return -EAGAIN;
+static inline void
+dump_attrs(struct nlmsghdr *nlh)
+{
+       struct nlattr *attr;
+       int rem;
+
+       D("dump nlmsg");        
+       nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
+               D("type: %u, len %u", nla_type(attr), attr->nla_len);
+       }
 }
 
-/*
- * Save sets
- */
-static inline int
-ip_set_save_marker(void *data, int *used, int len)
+static int
+ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct ip_set_save *set_save;
+       ip_set_id_t index = IPSET_INVALID_ID, max;
+       struct ip_set *set = NULL;
+       struct nlmsghdr *nlh = NULL;
+       unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
+       int ret = 0;
 
-       DP("used %u, len %u", *used, len);
-       /* Get and ensure header size */
-       if (*used + ALIGNED(sizeof(struct ip_set_save)) > len)
-               return -ENOMEM;
+       max = cb->args[0] ? cb->args[1] + 1 : ip_set_max;
+       rcu_read_lock();
+       for (; cb->args[1] < max; cb->args[1]++) {
+               index = (ip_set_id_t) cb->args[1];
+               set = rcu_dereference(ip_set_list[index]);
+               if (set == NULL) {
+                       if (cb->args[0]) {
+                               ret = -EEXIST;
+                               goto unlock;
+                       }
+                       continue;
+               }
+               D("List set: %s", set->name);
+               if (!cb->args[2]) {
+                       /* Start listing: make sure set won't be destroyed */
+                       D("reference set");
+                       __ip_set_get(index);
+               }
+               nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
+                               cb->nlh->nlmsg_seq, flags,
+                               IPSET_CMD_LIST);
+               if (!nlh) {
+                       ret = -EFAULT;
+                       goto release_refcount;
+               }
+               NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
+               NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name);
+               switch (cb->args[2]) {
+               case 0:
+                       /* Core header data */
+                       NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME,
+                                      set->type->name);
+                       NLA_PUT_U8(skb, IPSET_ATTR_FAMILY,
+                                  set->type->family);
+                       NLA_PUT_U8(skb, IPSET_ATTR_REVISION,
+                                  set->type->revision);
+                       ret = set->variant->head(set, skb);
+                       if (ret < 0)
+                               goto release_refcount;
+                       /* Fall through and add elements */
+               default:
+                       read_lock_bh(&set->lock);
+                       ret = set->variant->list(set, skb, cb);
+                       read_unlock_bh(&set->lock);
+                       if (!cb->args[2])
+                               /* Set is done, proceed with next one */
+                               cb->args[1]++;
+                       goto release_refcount;
+               }
+       }
+       goto unlock;
+
+nla_put_failure:
+       ret = -EFAULT;
+release_refcount:
+       /* If there was an error or set is done, release set */
+       if (ret || !cb->args[2]) {
+               D("release set");
+               __ip_set_put(index);
+       }
+unlock:
+       rcu_read_unlock();
 
-       /* Marker: just for backward compatibility */
-       set_save = data + *used;
-       set_save->index = IP_SET_INVALID_ID;
-       set_save->header_size = 0;
-       set_save->members_size = 0;
-       *used += ALIGNED(sizeof(struct ip_set_save));
+       if (nlh) {
+               nlmsg_end(skb, nlh);
+               D("nlmsg_len: %u", nlh->nlmsg_len);
+               dump_attrs(nlh);
+       }
        
-       return 0;
+       return ret < 0 ? ret : skb->len;
 }
 
 static int
-ip_set_save_set(ip_set_id_t index, void *data, int *used, int len)
+ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
+           const struct nlmsghdr *nlh,
+           const struct nlattr * const attr[])
 {
-       struct ip_set *set;
-       struct ip_set_save *set_save;
-
-       /* Pointer to our header */
-       set_save = data + *used;
+       ip_set_id_t index;
+       
+       if (unlikely(protocol_failed(attr)))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (!attr[IPSET_ATTR_SETNAME])
+               return netlink_dump_start(ctnl, skb, nlh,
+                                         ip_set_dump_start,
+                                         ip_set_dump_done);
+
+       rcu_read_lock();
+       index = find_set_id_rcu(nla_data(attr[IPSET_ATTR_SETNAME]));
+       if (index == IPSET_INVALID_ID) {
+               rcu_read_unlock();
+               return -EEXIST;
+       }
+       rcu_read_unlock();
 
-       /* Get and ensure header size */
-       if (*used + ALIGNED(sizeof(struct ip_set_save)) > len)
-               goto not_enough_mem;
-       *used += ALIGNED(sizeof(struct ip_set_save));
+       /* cb->args[0] : 1 => dump single set,
+        *             : 0 => dump all sets
+        *         [1] : set index
+        *         [..]: type specific
+        */
+       return netlink_dump_init(ctnl, skb, nlh,
+                                ip_set_dump_start,
+                                ip_set_dump_done,
+                                2, 1, index);
+}
 
-       set = ip_set_list[index];
-       DP("set: %s, used: %d(%d) %p %p", set->name, *used, len,
-          data, data + *used);
+/* Add, del and test */
 
-       read_lock_bh(&set->lock);
-       /* Get and ensure set specific header size */
-       set_save->header_size = ALIGNED(set->type->header_size);
-       if (*used + set_save->header_size > len)
-               goto unlock_set;
-
-       /* Fill in the header */
-       set_save->index = index;
-       set_save->binding = IP_SET_INVALID_ID;
-
-       /* Fill in set spefific header data */
-       set->type->list_header(set, data + *used);
-       *used += set_save->header_size;
-
-       DP("set header filled: %s, used: %d(%lu) %p %p", set->name, *used,
-          (unsigned long)set_save->header_size, data, data + *used);
-       /* Get and ensure set specific members size */
-       set_save->members_size = set->type->list_members_size(set, DONT_ALIGN);
-       if (*used + set_save->members_size > len)
-               goto unlock_set;
-
-       /* Fill in set spefific members data */
-       set->type->list_members(set, data + *used, DONT_ALIGN);
-       *used += set_save->members_size;
-       read_unlock_bh(&set->lock);
-       DP("set members filled: %s, used: %d(%lu) %p %p", set->name, *used,
-          (unsigned long)set_save->members_size, data, data + *used);
-       return 0;
+static const struct nla_policy
+ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
+       [IPSET_ATTR_SETNAME]    = { .type = NLA_STRING,
+                                   .len = IPSET_MAXNAMELEN },
+       [IPSET_ATTR_LINENO]     = { .type = NLA_U32 },
+       [IPSET_ATTR_DATA]       = { .type = NLA_NESTED },
+       [IPSET_ATTR_ADT]        = { .type = NLA_NESTED },
+};
 
-    unlock_set:
-       read_unlock_bh(&set->lock);
-    not_enough_mem:
-       DP("not enough mem, try again");
-       return -EAGAIN;
+static int
+call_ad(struct sock *ctnl, struct sk_buff *skb,
+       const struct nlattr * const attr[],
+       struct ip_set *set, const struct nlattr *nla,
+       enum ipset_adt adt, uint32_t flags)
+{
+       struct nlattr *head = nla_data(nla);
+       int ret, len = nla_len(nla), retried = 0;
+       uint32_t lineno = 0;
+       bool eexist = flags & IPSET_FLAG_EXIST;
+       
+       do {
+               write_lock_bh(&set->lock);
+               ret = set->variant->uadt(set, head, len, adt,
+                                        &lineno, flags);
+               write_unlock_bh(&set->lock);
+       } while (ret == -EAGAIN
+                && set->variant->resize
+                && (ret = set->variant->resize(set, retried++)) == 0);
+
+       if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
+               return 0;
+       if (lineno && attr[IPSET_ATTR_LINENO]) {
+               /* Error in restore/batch mode: send back lineno */
+               uint32_t *errline = nla_data(attr[IPSET_ATTR_LINENO]);
+               
+               *errline = lineno;
+       }
+       
+       return ret;
 }
 
-/*
- * Restore sets
- */
 static int
-ip_set_restore(void *data, int len)
+ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
+           const struct nlmsghdr *nlh,
+           const struct nlattr * const attr[])
 {
-       int res = 0;
-       int line = 0, used = 0, members_size;
        struct ip_set *set;
-       struct ip_set_restore *set_restore;
-       ip_set_id_t index;
+       const struct nlattr *nla;
+       uint32_t flags = flag_exist(nlh);
+       int ret = 0;
 
-       /* Loop to restore sets */
-       while (1) {
-               line++;
-               
-               DP("%d %zu %d", used, ALIGNED(sizeof(struct ip_set_restore)), len);
-               /* Get and ensure header size */
-               if (used + ALIGNED(sizeof(struct ip_set_restore)) > len)
-                       return line;
-               set_restore = data + used;
-               used += ALIGNED(sizeof(struct ip_set_restore));
-
-               /* Ensure data size */
-               if (used
-                   + set_restore->header_size
-                   + set_restore->members_size > len)
-                       return line;
-
-               /* Check marker */
-               if (set_restore->index == IP_SET_INVALID_ID) {
-                       line--;
-                       goto finish;
-               }
-               
-               /* Try to create the set */
-               DP("restore %s %s", set_restore->name, set_restore->typename);
-               res = ip_set_create(set_restore->name,
-                                   set_restore->typename,
-                                   set_restore->index,
-                                   data + used,
-                                   set_restore->header_size);
+       if (unlikely(protocol_failed(attr)
+                    || attr[IPSET_ATTR_SETNAME] == NULL
+                    || !((attr[IPSET_ATTR_DATA] != NULL) ^ 
+                         (attr[IPSET_ATTR_ADT] != NULL))
+                    || (attr[IPSET_ATTR_DATA] != NULL
+                        && !flag_nested(attr[IPSET_ATTR_DATA]))
+                    || (attr[IPSET_ATTR_ADT] != NULL
+                        && (!flag_nested(attr[IPSET_ATTR_ADT])
+                            || attr[IPSET_ATTR_LINENO] == NULL))))
+               return -IPSET_ERR_PROTOCOL;
+
+       set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+       if (set == NULL)
+               return -EEXIST;
+
+       if (attr[IPSET_ATTR_DATA]) {
+               ret = call_ad(ctnl, skb, attr,
+                             set, attr[IPSET_ATTR_DATA], IPSET_ADD, flags);
+       } else {
+               int nla_rem;
                
-               if (res != 0)
-                       return line;
-               used += ALIGNED(set_restore->header_size);
-
-               index = ip_set_find_byindex(set_restore->index);
-               DP("index %u, restore_index %u", index, set_restore->index);
-               if (index != set_restore->index)
-                       return line;
-               /* Try to restore members data */
-               set = ip_set_list[index];
-               members_size = 0;
-               DP("members_size %lu reqsize %lu",
-                  (unsigned long)set_restore->members_size,
-                  (unsigned long)set->type->reqsize);
-               while (members_size + ALIGNED(set->type->reqsize) <=
-                      set_restore->members_size) {
-                       line++;
-                       DP("members: %d, line %d", members_size, line);
-                       res = ip_set_addip(set,
-                                          data + used + members_size,
-                                          set->type->reqsize);
-                       if (!(res == 0 || res == -EEXIST))
-                               return line;
-                       members_size += ALIGNED(set->type->reqsize);
+               nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
+                       if (nla_type(nla) != IPSET_ATTR_DATA
+                           || !flag_nested(nla))
+                               return -IPSET_ERR_PROTOCOL;
+                       ret = call_ad(ctnl, skb, attr,
+                                      set, nla, IPSET_ADD, flags);
+                       if (ret < 0)
+                               return ret;
                }
-
-               DP("members_size %lu  %d",
-                  (unsigned long)set_restore->members_size, members_size);
-               if (members_size != set_restore->members_size)
-                       return line++;
-               used += set_restore->members_size;              
        }
-       
-   finish:
-       if (used != len)
-               return line;
-       
-       return 0;       
+       return ret;
 }
 
 static int
-ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len)
+ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
+           const struct nlmsghdr *nlh,
+           const struct nlattr * const attr[])
 {
-       void *data;
-       int res = 0;            /* Assume OK */
-       size_t offset;
-       unsigned *op;
-       struct ip_set_req_adt *req_adt;
-       ip_set_id_t index = IP_SET_INVALID_ID;
-       int (*adtfn)(struct ip_set *set,
-                    const void *data, u_int32_t size);
-       struct fn_table {
-               int (*fn)(struct ip_set *set,
-                         const void *data, u_int32_t size);
-       } adtfn_table[] =
-               { { ip_set_addip }, { ip_set_delip }, { ip_set_testip},
-       };
-
-       DP("optval=%d, user=%p, len=%d", optval, user, len);
-       if (!capable(CAP_NET_ADMIN))
-               return -EPERM;
-       if (optval != SO_IP_SET)
-               return -EBADF;
-       if (len <= sizeof(unsigned)) {
-               ip_set_printk("short userdata (want >%zu, got %u)",
-                             sizeof(unsigned), len);
-               return -EINVAL;
-       }
-       data = vmalloc(len);
-       if (!data) {
-               DP("out of mem for %u bytes", len);
-               return -ENOMEM;
-       }
-       if (copy_from_user(data, user, len) != 0) {
-               res = -EFAULT;
-               goto done;
-       }
-       if (down_interruptible(&ip_set_app_mutex)) {
-               res = -EINTR;
-               goto done;
-       }
+       struct ip_set *set;
+       const struct nlattr *nla;
+       uint32_t flags = flag_exist(nlh);
+       int ret = 0;
 
-       op = (unsigned *)data;
-       DP("op=%x", *op);
+       if (unlikely(protocol_failed(attr)
+                    || attr[IPSET_ATTR_SETNAME] == NULL
+                    || !((attr[IPSET_ATTR_DATA] != NULL) ^ 
+                         (attr[IPSET_ATTR_ADT] != NULL))
+                    || (attr[IPSET_ATTR_DATA] != NULL
+                        && !flag_nested(attr[IPSET_ATTR_DATA]))
+                    || (attr[IPSET_ATTR_ADT] != NULL
+                        && (!flag_nested(attr[IPSET_ATTR_ADT])
+                            || attr[IPSET_ATTR_LINENO] == NULL))))
+               return -IPSET_ERR_PROTOCOL;
        
-       if (*op < IP_SET_OP_VERSION) {
-               /* Check the version at the beginning of operations */
-               struct ip_set_req_version *req_version = data;
-               if (!(req_version->version == IP_SET_PROTOCOL_UNALIGNED
-                     || req_version->version == IP_SET_PROTOCOL_VERSION)) {
-                       res = -EPROTO;
-                       goto done;
-               }
-               protocol_version = req_version->version;
-       }
-
-       switch (*op) {
-       case IP_SET_OP_CREATE:{
-               struct ip_set_req_create *req_create = data;
-               offset = ALIGNED(sizeof(struct ip_set_req_create));
-               
-               if (len < offset) {
-                       ip_set_printk("short CREATE data (want >=%zu, got %u)",
-                                     offset, len);
-                       res = -EINVAL;
-                       goto done;
-               }
-               req_create->name[IP_SET_MAXNAMELEN - 1] = '\0';
-               req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0';
-               res = ip_set_create(req_create->name,
-                                   req_create->typename,
-                                   IP_SET_INVALID_ID,
-                                   data + offset,
-                                   len - offset);
-               goto done;
-       }
-       case IP_SET_OP_DESTROY:{
-               struct ip_set_req_std *req_destroy = data;
+       set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+       if (set == NULL)
+               return -EEXIST;
+       
+       if (attr[IPSET_ATTR_DATA]) {
+               ret = call_ad(ctnl, skb, attr,
+                             set, attr[IPSET_ATTR_DATA], IPSET_DEL, flags);
+       } else {
+               int nla_rem;
                
-               if (len != sizeof(struct ip_set_req_std)) {
-                       ip_set_printk("invalid DESTROY data (want %zu, got %u)",
-                                     sizeof(struct ip_set_req_std), len);
-                       res = -EINVAL;
-                       goto done;
-               }
-               if (STREQ(req_destroy->name, IPSET_TOKEN_ALL)) {
-                       /* Destroy all sets */
-                       index = IP_SET_INVALID_ID;
-               } else {
-                       req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0';
-                       index = ip_set_find_byname(req_destroy->name);
-
-                       if (index == IP_SET_INVALID_ID) {
-                               res = -ENOENT;
-                               goto done;
-                       }
-               }
-                       
-               res = ip_set_destroy(index);
-               goto done;
-       }
-       case IP_SET_OP_FLUSH:{
-               struct ip_set_req_std *req_flush = data;
-
-               if (len != sizeof(struct ip_set_req_std)) {
-                       ip_set_printk("invalid FLUSH data (want %zu, got %u)",
-                                     sizeof(struct ip_set_req_std), len);
-                       res = -EINVAL;
-                       goto done;
+               nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
+                       if (nla_type(nla) != IPSET_ATTR_DATA
+                           || !flag_nested(nla))
+                               return -IPSET_ERR_PROTOCOL;
+                       ret = call_ad(ctnl, skb, attr,
+                                      set, nla, IPSET_DEL, flags);
+                       if (ret < 0)
+                               return ret;
                }
-               if (STREQ(req_flush->name, IPSET_TOKEN_ALL)) {
-                       /* Flush all sets */
-                       index = IP_SET_INVALID_ID;
-               } else {
-                       req_flush->name[IP_SET_MAXNAMELEN - 1] = '\0';
-                       index = ip_set_find_byname(req_flush->name);
-
-                       if (index == IP_SET_INVALID_ID) {
-                               res = -ENOENT;
-                               goto done;
-                       }
-               }
-               res = ip_set_flush(index);
-               goto done;
        }
-       case IP_SET_OP_RENAME:{
-               struct ip_set_req_create *req_rename = data;
-
-               if (len != sizeof(struct ip_set_req_create)) {
-                       ip_set_printk("invalid RENAME data (want %zu, got %u)",
-                                     sizeof(struct ip_set_req_create), len);
-                       res = -EINVAL;
-                       goto done;
-               }
+       return ret;
+}
 
-               req_rename->name[IP_SET_MAXNAMELEN - 1] = '\0';
-               req_rename->typename[IP_SET_MAXNAMELEN - 1] = '\0';
-                       
-               index = ip_set_find_byname(req_rename->name);
-               if (index == IP_SET_INVALID_ID) {
-                       res = -ENOENT;
-                       goto done;
-               }
-               res = ip_set_rename(index, req_rename->typename);
-               goto done;
-       }
-       case IP_SET_OP_SWAP:{
-               struct ip_set_req_create *req_swap = data;
-               ip_set_id_t to_index;
-
-               if (len != sizeof(struct ip_set_req_create)) {
-                       ip_set_printk("invalid SWAP data (want %zu, got %u)",
-                                     sizeof(struct ip_set_req_create), len);
-                       res = -EINVAL;
-                       goto done;
-               }
+static int
+ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
+            const struct nlmsghdr *nlh,
+            const struct nlattr * const attr[])
+{
+       struct ip_set *set;
+       int ret = 0;
 
-               req_swap->name[IP_SET_MAXNAMELEN - 1] = '\0';
-               req_swap->typename[IP_SET_MAXNAMELEN - 1] = '\0';
+       if (unlikely(protocol_failed(attr)
+                    || attr[IPSET_ATTR_SETNAME] == NULL
+                    || attr[IPSET_ATTR_DATA] == NULL
+                    || !flag_nested(attr[IPSET_ATTR_DATA])))
+               return -IPSET_ERR_PROTOCOL;
+       
+       set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+       if (set == NULL)
+               return -EEXIST;
+       
+       read_lock_bh(&set->lock);
+       ret = set->variant->uadt(set,
+                                nla_data(attr[IPSET_ATTR_DATA]),
+                                nla_len(attr[IPSET_ATTR_DATA]),
+                                IPSET_TEST, NULL, 0);
+       read_unlock_bh(&set->lock);
+       /* Userspace can't trigger element to be re-added */
+       if (ret == -EAGAIN)
+               ret = 1;
+       
+       return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
+}
 
-               index = ip_set_find_byname(req_swap->name);
-               if (index == IP_SET_INVALID_ID) {
-                       res = -ENOENT;
-                       goto done;
-               }
-               to_index = ip_set_find_byname(req_swap->typename);
-               if (to_index == IP_SET_INVALID_ID) {
-                       res = -ENOENT;
-                       goto done;
-               }
-               res = ip_set_swap(index, to_index);
-               goto done;
-       }
-       default:
-               break;  /* Set identified by id */
-       }
+/* Get headed data of a set */
+
+static int
+ip_set_header(struct sock *ctnl, struct sk_buff *skb,
+             const struct nlmsghdr *nlh,
+             const struct nlattr * const attr[])
+{
+       struct ip_set *set;
+       struct sk_buff *skb2;
+       struct nlmsghdr *nlh2;
+       ip_set_id_t index;
+       int ret = 0;
+
+       if (unlikely(protocol_failed(attr)
+                    || attr[IPSET_ATTR_SETNAME] == NULL))
+               return -IPSET_ERR_PROTOCOL;
        
-       /* There we may have add/del/test/bind/unbind/test_bind operations */
-       if (*op < IP_SET_OP_ADD_IP || *op > IP_SET_OP_TEST_IP) {
-               res = -EBADMSG;
-               goto done;
-       }
-       adtfn = adtfn_table[*op - IP_SET_OP_ADD_IP].fn;
+       index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
+       if (index == IPSET_INVALID_ID)
+               return -EEXIST;
+       set = ip_set_list[index];
 
-       if (len < ALIGNED(sizeof(struct ip_set_req_adt))) {
-               ip_set_printk("short data in adt request (want >=%zu, got %u)",
-                             ALIGNED(sizeof(struct ip_set_req_adt)), len);
-               res = -EINVAL;
-               goto done;
-       }
-       req_adt = data;
+       skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (skb2 == NULL)
+               return -ENOMEM;
+       
+       nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+                        IPSET_CMD_HEADER);
+       if (!nlh2)
+               goto nlmsg_failure;
+       NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
+       NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name);
+       NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name);
+       NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->type->family);
+       NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision);
+       nlmsg_end(skb2, nlh2);
+
+       ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+       if (ret < 0)
+               return -EFAULT;
+       
+       return 0;
 
-       index = ip_set_find_byindex(req_adt->index);
-       if (index == IP_SET_INVALID_ID) {
-               res = -ENOENT;
-               goto done;
-       }
-       do {
-               struct ip_set *set = ip_set_list[index];
-               size_t offset = ALIGNED(sizeof(struct ip_set_req_adt));
+nla_put_failure:
+       nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+       kfree_skb(skb2);        
+       return -EFAULT;
+}
 
-               IP_SET_ASSERT(set);
+/* Get type data */
 
-               if (len - offset != set->type->reqsize) {
-                       ip_set_printk("data length wrong (want %lu, have %zu)",
-                                     (long unsigned)set->type->reqsize,
-                                     len - offset);
-                       res = -EINVAL;
-                       goto done;
+static const struct nla_policy
+ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
+       [IPSET_ATTR_TYPENAME]   = { .type = NLA_STRING,
+                                   .len = IPSET_MAXNAMELEN },
+       [IPSET_ATTR_FAMILY]     = { .type = NLA_U8 },
+};
+
+static bool
+find_set_type_minmax(const char *name, uint8_t family,
+                    uint8_t *min, uint8_t *max)
+{
+       struct ip_set_type *type;
+       bool ret = false;
+       
+       *min = *max = 0;
+       ip_set_type_list_lock();
+       list_for_each_entry(type, &ip_set_type_list, list)
+               if (STREQ(type->name, name)
+                   && (type->family == family || type->family == AF_UNSPEC)) {
+                       ret = true;
+                       if (type->revision < *min)
+                               *min = type->revision;
+                       else if (type->revision > *max)
+                               *max = type->revision;
                }
-               res = adtfn(set, data + offset, len - offset);
-       } while (0);
-
-    done:
-       up(&ip_set_app_mutex);
-       vfree(data);
-       if (res > 0)
-               res = 0;
-       DP("final result %d", res);
-       return res;
+       ip_set_type_list_unlock();
+
+       return ret;
 }
 
 static int
-ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len)
+ip_set_type(struct sock *ctnl, struct sk_buff *skb,
+           const struct nlmsghdr *nlh,
+           const struct nlattr * const attr[])
 {
-       int res = 0;
-       unsigned *op;
-       ip_set_id_t index = IP_SET_INVALID_ID;
-       void *data;
-       int copylen = *len;
-
-       DP("optval=%d, user=%p, len=%d", optval, user, *len);
-       if (!capable(CAP_NET_ADMIN))
-               return -EPERM;
-       if (optval != SO_IP_SET)
-               return -EBADF;
-       if (*len < sizeof(unsigned)) {
-               ip_set_printk("short userdata (want >=%zu, got %d)",
-                             sizeof(unsigned), *len);
-               return -EINVAL;
-       }
-       data = vmalloc(*len);
-       if (!data) {
-               DP("out of mem for %d bytes", *len);
-               return -ENOMEM;
-       }
-       if (copy_from_user(data, user, *len) != 0) {
-               res = -EFAULT;
-               goto done;
-       }
-       if (down_interruptible(&ip_set_app_mutex)) {
-               res = -EINTR;
-               goto done;
-       }
-
-       op = (unsigned *) data;
-       DP("op=%x", *op);
+       struct sk_buff *skb2;
+       struct nlmsghdr *nlh2;
+       uint8_t family, min, max;
+       const char *typename;
+       int ret = 0;
 
-       if (*op < IP_SET_OP_VERSION) {
-               /* Check the version at the beginning of operations */
-               struct ip_set_req_version *req_version = data;
-               if (!(req_version->version == IP_SET_PROTOCOL_UNALIGNED
-                     || req_version->version == IP_SET_PROTOCOL_VERSION)) {
-                       res = -EPROTO;
-                       goto done;
+       if (unlikely(protocol_failed(attr)
+                    || attr[IPSET_ATTR_TYPENAME] == NULL
+                    || attr[IPSET_ATTR_FAMILY] == NULL))
+               return -IPSET_ERR_PROTOCOL;
+       
+       family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
+       typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
+       if (!find_set_type_minmax(typename, family, &min, &max)) {
+               /* Try to load in the type module */
+               load_type_module(typename);
+               if (!find_set_type_minmax(typename, family, &min, &max)) {
+                       D("can't find: %s, family: %u", typename, family);
+                       return -EEXIST;
                }
-               protocol_version = req_version->version;
        }
 
-       switch (*op) {
-       case IP_SET_OP_VERSION: {
-               struct ip_set_req_version *req_version = data;
+       skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (skb2 == NULL)
+               return -ENOMEM;
+       
+       nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+                        IPSET_CMD_TYPE);
+       if (!nlh2)
+               goto nlmsg_failure;
+       NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
+       NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename);
+       NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family);
+       NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max);
+       NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min);
+       nlmsg_end(skb2, nlh2);
+
+       D("Send TYPE, nlmsg_len: %u", nlh2->nlmsg_len);
+       ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+       if (ret < 0)
+               return -EFAULT;
+       
+       return 0;
 
-               if (*len != sizeof(struct ip_set_req_version)) {
-                       ip_set_printk("invalid VERSION (want %zu, got %d)",
-                                     sizeof(struct ip_set_req_version),
-                                     *len);
-                       res = -EINVAL;
-                       goto done;
-               }
+nla_put_failure:
+       nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+       kfree_skb(skb2);        
+       return -EFAULT;
+}
 
-               req_version->version = IP_SET_PROTOCOL_VERSION;
-               res = copy_to_user(user, req_version,
-                                  sizeof(struct ip_set_req_version));
-               goto done;
-       }
-       case IP_SET_OP_GET_BYNAME: {
-               struct ip_set_req_get_set *req_get = data;
-
-               if (*len != sizeof(struct ip_set_req_get_set)) {
-                       ip_set_printk("invalid GET_BYNAME (want %zu, got %d)",
-                                     sizeof(struct ip_set_req_get_set), *len);
-                       res = -EINVAL;
-                       goto done;
-               }
-               req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
-               index = ip_set_find_byname(req_get->set.name);
-               req_get->set.index = index;
-               goto copy;
-       }
-       case IP_SET_OP_GET_BYINDEX: {
-               struct ip_set_req_get_set *req_get = data;
-
-               if (*len != sizeof(struct ip_set_req_get_set)) {
-                       ip_set_printk("invalid GET_BYINDEX (want %zu, got %d)",
-                                     sizeof(struct ip_set_req_get_set), *len);
-                       res = -EINVAL;
-                       goto done;
-               }
-               req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
-               index = ip_set_find_byindex(req_get->set.index);
-               strncpy(req_get->set.name,
-                       index == IP_SET_INVALID_ID ? ""
-                       : ip_set_list[index]->name, IP_SET_MAXNAMELEN);
-               goto copy;
-       }
-       case IP_SET_OP_ADT_GET: {
-               struct ip_set_req_adt_get *req_get = data;
-
-               if (*len != sizeof(struct ip_set_req_adt_get)) {
-                       ip_set_printk("invalid ADT_GET (want %zu, got %d)",
-                                     sizeof(struct ip_set_req_adt_get), *len);
-                       res = -EINVAL;
-                       goto done;
-               }
-               req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
-               index = ip_set_find_byname(req_get->set.name);
-               if (index != IP_SET_INVALID_ID) {
-                       req_get->set.index = index;
-                       strncpy(req_get->typename,
-                               ip_set_list[index]->type->typename,
-                               IP_SET_MAXNAMELEN - 1);
-               } else {
-                       res = -ENOENT;
-                       goto done;
-               }
-               goto copy;
-       }
-       case IP_SET_OP_MAX_SETS: {
-               struct ip_set_req_max_sets *req_max_sets = data;
-               ip_set_id_t i;
-
-               if (*len != sizeof(struct ip_set_req_max_sets)) {
-                       ip_set_printk("invalid MAX_SETS (want %zu, got %d)",
-                                     sizeof(struct ip_set_req_max_sets), *len);
-                       res = -EINVAL;
-                       goto done;
-               }
+/* Get protocol version */
 
-               if (STREQ(req_max_sets->set.name, IPSET_TOKEN_ALL)) {
-                       req_max_sets->set.index = IP_SET_INVALID_ID;
-               } else {
-                       req_max_sets->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
-                       req_max_sets->set.index =
-                               ip_set_find_byname(req_max_sets->set.name);
-                       if (req_max_sets->set.index == IP_SET_INVALID_ID) {
-                               res = -ENOENT;
-                               goto done;
-                       }
-               }
-               req_max_sets->max_sets = ip_set_max;
-               req_max_sets->sets = 0;
-               for (i = 0; i < ip_set_max; i++) {
-                       if (ip_set_list[i] != NULL)
-                               req_max_sets->sets++;
-               }
-               goto copy;
-       }
-       case IP_SET_OP_LIST_SIZE:
-       case IP_SET_OP_SAVE_SIZE: {
-               struct ip_set_req_setnames *req_setnames = data;
-               struct ip_set_name_list *name_list;
-               struct ip_set *set;
-               ip_set_id_t i;
-               int used;
-
-               if (*len < ALIGNED(sizeof(struct ip_set_req_setnames))) {
-                       ip_set_printk("short LIST_SIZE (want >=%zu, got %d)",
-                                     ALIGNED(sizeof(struct ip_set_req_setnames)),
-                                     *len);
-                       res = -EINVAL;
-                       goto done;
-               }
+static const struct nla_policy
+ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
+};
 
-               req_setnames->size = 0;
-               used = ALIGNED(sizeof(struct ip_set_req_setnames));
-               for (i = 0; i < ip_set_max; i++) {
-                       if (ip_set_list[i] == NULL)
-                               continue;
-                       name_list = data + used;
-                       used += ALIGNED(sizeof(struct ip_set_name_list));
-                       if (used > copylen) {
-                               res = -EAGAIN;
-                               goto done;
-                       }
-                       set = ip_set_list[i];
-                       /* Fill in index, name, etc. */
-                       name_list->index = i;
-                       name_list->id = set->id;
-                       strncpy(name_list->name,
-                               set->name,
-                               IP_SET_MAXNAMELEN - 1);
-                       strncpy(name_list->typename,
-                               set->type->typename,
-                               IP_SET_MAXNAMELEN - 1);
-                       DP("filled %s of type %s, index %u\n",
-                          name_list->name, name_list->typename,
-                          name_list->index);
-                       if (!(req_setnames->index == IP_SET_INVALID_ID
-                             || req_setnames->index == i))
-                             continue;
-                       /* Update size */
-                       req_setnames->size +=
-                               (*op == IP_SET_OP_LIST_SIZE ?
-                                       ALIGNED(sizeof(struct ip_set_list)) :
-                                       ALIGNED(sizeof(struct ip_set_save)))
-                               + ALIGNED(set->type->header_size)
-                               + set->type->list_members_size(set, DONT_ALIGN);
-               }
-               if (copylen != used) {
-                       res = -EAGAIN;
-                       goto done;
-               }
-               goto copy;
-       }
-       case IP_SET_OP_LIST: {
-               struct ip_set_req_list *req_list = data;
-               ip_set_id_t i;
-               int used;
-
-               if (*len < sizeof(struct ip_set_req_list)) {
-                       ip_set_printk("short LIST (want >=%zu, got %d)",
-                                     sizeof(struct ip_set_req_list), *len);
-                       res = -EINVAL;
-                       goto done;
-               }
-               index = req_list->index;
-               if (index != IP_SET_INVALID_ID
-                   && ip_set_find_byindex(index) != index) {
-                       res = -ENOENT;
-                       goto done;
-               }
-               used = 0;
-               if (index == IP_SET_INVALID_ID) {
-                       /* List all sets */
-                       for (i = 0; i < ip_set_max && res == 0; i++) {
-                               if (ip_set_list[i] != NULL)
-                                       res = ip_set_list_set(i, data, &used, *len);
-                       }
-               } else {
-                       /* List an individual set */
-                       res = ip_set_list_set(index, data, &used, *len);
-               }
-               if (res != 0)
-                       goto done;
-               else if (copylen != used) {
-                       res = -EAGAIN;
-                       goto done;
-               }
-               goto copy;
-       }
-       case IP_SET_OP_SAVE: {
-               struct ip_set_req_list *req_save = data;
-               ip_set_id_t i;
-               int used;
-
-               if (*len < sizeof(struct ip_set_req_list)) {
-                       ip_set_printk("short SAVE (want >=%zu, got %d)",
-                                     sizeof(struct ip_set_req_list), *len);
-                       res = -EINVAL;
-                       goto done;
-               }
-               index = req_save->index;
-               if (index != IP_SET_INVALID_ID
-                   && ip_set_find_byindex(index) != index) {
-                       res = -ENOENT;
-                       goto done;
-               }
+static int
+ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
+               const struct nlmsghdr *nlh,
+               const struct nlattr * const attr[])
+{
+       struct sk_buff *skb2;
+       struct nlmsghdr *nlh2;
+       int ret = 0;
 
-#define SETLIST(set)   (strcmp(set->type->typename, "setlist") == 0)
-
-               used = 0;
-               if (index == IP_SET_INVALID_ID) {
-                       /* Save all sets: ugly setlist type dependency */
-                       int setlist = 0;
-               setlists:
-                       for (i = 0; i < ip_set_max && res == 0; i++) {
-                               if (ip_set_list[i] != NULL
-                                   && !(setlist ^ SETLIST(ip_set_list[i])))
-                                       res = ip_set_save_set(i, data, &used, *len);
-                       }
-                       if (!setlist) {
-                               setlist = 1;
-                               goto setlists;
-                       }
-               } else {
-                       /* Save an individual set */
-                       res = ip_set_save_set(index, data, &used, *len);
-               }
-               if (res == 0)
-                       res = ip_set_save_marker(data, &used, *len);
-                       
-               if (res != 0)
-                       goto done;
-               else if (copylen != used) {
-                       res = -EAGAIN;
-                       goto done;
-               }
-               goto copy;
-       }
-       case IP_SET_OP_RESTORE: {
-               struct ip_set_req_setnames *req_restore = data;
-               size_t offset = ALIGNED(sizeof(struct ip_set_req_setnames));
-               int line;
-
-               if (*len < offset || *len != req_restore->size) {
-                       ip_set_printk("invalid RESTORE (want =%lu, got %d)",
-                                     (long unsigned)req_restore->size, *len);
-                       res = -EINVAL;
-                       goto done;
-               }
-               line = ip_set_restore(data + offset, req_restore->size - offset);
-               DP("ip_set_restore: %d", line);
-               if (line != 0) {
-                       res = -EAGAIN;
-                       req_restore->size = line;
-                       copylen = sizeof(struct ip_set_req_setnames);
-                       goto copy;
-               }
-               goto done;
-       }
-       default:
-               res = -EBADMSG;
-               goto done;
-       }       /* end of switch(op) */
-
-    copy:
-       DP("set %s, copylen %d", index != IP_SET_INVALID_ID
-                                && ip_set_list[index]
-                    ? ip_set_list[index]->name
-                    : ":all:", copylen);
-       res = copy_to_user(user, data, copylen);
-       
-    done:
-       up(&ip_set_app_mutex);
-       vfree(data);
-       if (res > 0)
-               res = 0;
-       DP("final result %d", res);
-       return res;
+       if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
+               return -IPSET_ERR_PROTOCOL;
+       
+       skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (skb2 == NULL)
+               return -ENOMEM;
+       
+       nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+                        IPSET_CMD_PROTOCOL);
+       if (!nlh2)
+               goto nlmsg_failure;
+       NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
+       nlmsg_end(skb2, nlh2);
+
+       ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+       if (ret < 0)
+               return -EFAULT;
+       
+       return 0;
+
+nla_put_failure:
+       nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+       kfree_skb(skb2);        
+       return -EFAULT;
 }
 
-static struct nf_sockopt_ops so_set = {
-       .pf             = PF_INET,
-       .set_optmin     = SO_IP_SET,
-       .set_optmax     = SO_IP_SET + 1,
-       .set            = &ip_set_sockfn_set,
-       .get_optmin     = SO_IP_SET,
-       .get_optmax     = SO_IP_SET + 1,
-       .get            = &ip_set_sockfn_get,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
-       .use            = 0,
-#else
-       .owner          = THIS_MODULE,
-#endif
+static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
+       [IPSET_CMD_CREATE]      = {
+               .call           = ip_set_create,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_create_policy,
+       },
+       [IPSET_CMD_DESTROY]     = {
+               .call           = ip_set_destroy,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_setname_policy,
+       },
+       [IPSET_CMD_FLUSH]       = {
+               .call           = ip_set_flush,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_setname_policy,
+       },
+       [IPSET_CMD_RENAME]      = {
+               .call           = ip_set_rename,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_setname2_policy,
+       },
+       [IPSET_CMD_SWAP]        = {
+               .call           = ip_set_swap,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_setname2_policy,
+       },
+       [IPSET_CMD_LIST]        = {
+               .call           = ip_set_dump,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_setname_policy,
+       },
+       [IPSET_CMD_SAVE]        = {
+               .call           = ip_set_dump,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_setname_policy,
+       },
+       [IPSET_CMD_ADD] = {
+               .call           = ip_set_uadd,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_adt_policy,
+       },
+       [IPSET_CMD_DEL] = {
+               .call           = ip_set_udel,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_adt_policy,
+       },
+       [IPSET_CMD_TEST]        = {
+               .call           = ip_set_utest,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_adt_policy,
+       },
+       [IPSET_CMD_HEADER]      = {
+               .call           = ip_set_header,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_setname_policy,
+       },
+       [IPSET_CMD_TYPE]        = {
+               .call           = ip_set_type,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_type_policy,
+       },
+       [IPSET_CMD_PROTOCOL]    = {
+               .call           = ip_set_protocol,
+               .attr_count     = IPSET_ATTR_CMD_MAX,
+               .policy         = ip_set_protocol_policy,
+       },
 };
 
-static int max_sets;
-
-module_param(max_sets, int, 0600);
-MODULE_PARM_DESC(max_sets, "maximal number of sets");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("module implementing core IP set support");
+static struct nfnetlink_subsystem ip_set_netlink_subsys = {
+       .name           = "ip_set",
+       .subsys_id      = NFNL_SUBSYS_IPSET,
+       .cb_count       = IPSET_MSG_MAX,
+       .cb             = ip_set_netlink_subsys_cb,
+};
 
 static int __init
 ip_set_init(void)
 {
-       int res;
-
-       /* For the -rt branch, DECLARE_MUTEX/init_MUTEX avoided */
-       sema_init(&ip_set_app_mutex, 1);
+       int ret;
 
        if (max_sets)
                ip_set_max = max_sets;
-       if (ip_set_max >= IP_SET_INVALID_ID)
-               ip_set_max = IP_SET_INVALID_ID - 1;
+       if (ip_set_max >= IPSET_INVALID_ID)
+               ip_set_max = IPSET_INVALID_ID - 1;
 
-       ip_set_list = vmalloc(sizeof(struct ip_set *) * ip_set_max);
+       ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
        if (!ip_set_list) {
                printk(KERN_ERR "Unable to create ip_set_list\n");
                return -ENOMEM;
        }
-       memset(ip_set_list, 0, sizeof(struct ip_set *) * ip_set_max);
 
-       INIT_LIST_HEAD(&set_type_list);
+       INIT_LIST_HEAD(&ip_set_type_list);
 
-       res = nf_register_sockopt(&so_set);
-       if (res != 0) {
-               ip_set_printk("SO_SET registry failed: %d", res);
-               vfree(ip_set_list);
-               return res;
+       ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+       if (ret != 0) {
+               printk("ip_set_init: cannot register with nfnetlink.\n");
+               kfree(ip_set_list);
+               return ret;
        }
 
-       printk("ip_set version %u loaded\n", IP_SET_PROTOCOL_VERSION);  
+       printk("ip_set with protocol version %u loaded\n", IPSET_PROTOCOL);     
        return 0;
 }
 
 static void __exit
 ip_set_fini(void)
 {
-       /* There can't be any existing set or binding */
-       nf_unregister_sockopt(&so_set);
-       vfree(ip_set_list);
-       DP("these are the famous last words");
+       /* There can't be any existing set */
+       nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+       kfree(ip_set_list);
+       D("these are the famous last words");
 }
 
-EXPORT_SYMBOL(ip_set_register_set_type);
-EXPORT_SYMBOL(ip_set_unregister_set_type);
+EXPORT_SYMBOL(ip_set_type_register);
+EXPORT_SYMBOL(ip_set_type_unregister);
 
 EXPORT_SYMBOL(ip_set_get_byname);
-EXPORT_SYMBOL(ip_set_get_byindex);
 EXPORT_SYMBOL(ip_set_put_byindex);
-EXPORT_SYMBOL(ip_set_id);
-EXPORT_SYMBOL(__ip_set_get_byname);
-EXPORT_SYMBOL(__ip_set_put_byindex);
 
-EXPORT_SYMBOL(ip_set_addip_kernel);
-EXPORT_SYMBOL(ip_set_delip_kernel);
-EXPORT_SYMBOL(ip_set_testip_kernel);
+EXPORT_SYMBOL(ip_set_add);
+EXPORT_SYMBOL(ip_set_del);
+EXPORT_SYMBOL(ip_set_test);
 
 module_init(ip_set_init);
 module_exit(ip_set_fini);
index be3c5385d9d76411cbe8165c283187cc8a63add2..ccb5473a05d7ab766346da9c57b004e4f3df0b5f 100644 (file)
@@ -1,13 +1,13 @@
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2008 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
-/* Kernel module implementing an IP set type: the single bitmap type */
+/* Kernel module implementing an IP set type: the bitmap:ip type */
 
 #include <linux/module.h>
 #include <linux/ip.h>
 #include <asm/uaccess.h>
 #include <asm/bitops.h>
 #include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+#include <net/pfxlen.h>
+#include <net/tcp.h>
 
-#include <linux/netfilter_ipv4/ip_set_ipmap.h>
+#include <linux/netfilter/ip_set.h>
+#include <linux/netfilter/ip_set_bitmap.h>
+#define IP_SET_BITMAP_TIMEOUT
+#include <linux/netfilter/ip_set_timeout.h>
 
-static inline ip_set_ip_t
-ip_to_id(const struct ip_set_ipmap *map, ip_set_ip_t ip)
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("bitmap:ip type of IP sets");
+MODULE_ALIAS("ip_set_bitmap:ip");
+
+/* Base variant */
+
+struct bitmap_ip {
+       void *members;          /* the set members */
+       uint32_t first_ip;      /* host byte order, included in range */
+       uint32_t last_ip;       /* host byte order, included in range */
+       uint32_t elements;      /* number of max elements in the set */
+       uint32_t hosts;         /* number of hosts in a subnet */
+       size_t memsize;         /* members size */
+       uint8_t netmask;        /* subnet netmask */
+};
+
+static inline uint32_t
+ip_to_id(const struct bitmap_ip *map, uint32_t ip)
 {
-       return ((ip & map->netmask) - map->first_ip)/map->hosts;
+       return ((ip & HOSTMASK(map->netmask)) - map->first_ip)/map->hosts;
 }
 
 static inline int
-ipmap_test(const struct ip_set *set, ip_set_ip_t ip)
+bitmap_ip_test(const struct bitmap_ip *map, uint32_t id)
 {
-       const struct ip_set_ipmap *map = set->data;
-       
-       if (ip < map->first_ip || ip > map->last_ip)
-               return -ERANGE;
-
-       DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip));
-       return !!test_bit(ip_to_id(map, ip), map->members);
+       return !!test_bit(id, map->members);
 }
 
-#define KADT_CONDITION
+static inline int
+bitmap_ip_add(struct bitmap_ip *map, uint32_t id)
+{
+       if (test_and_set_bit(id, map->members))
+               return -IPSET_ERR_EXIST;
 
-UADT(ipmap, test)
-KADT(ipmap, test, ipaddr)
+       return 0;
+}
 
 static inline int
-ipmap_add(struct ip_set *set, ip_set_ip_t ip)
+bitmap_ip_del(struct bitmap_ip *map, uint32_t id)
+{
+       if (!test_and_clear_bit(id, map->members))
+               return -IPSET_ERR_EXIST;
+
+       return 0;
+}
+
+static int
+bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
+              enum ipset_adt adt, uint8_t pf, const uint8_t *flags)
 {
-       struct ip_set_ipmap *map = set->data;
+       struct bitmap_ip *map = set->data;
+       uint32_t ip = ntohl(ip4addr(skb, flags));
+       
+       if (pf != AF_INET)
+               return -EINVAL;
 
        if (ip < map->first_ip || ip > map->last_ip)
-               return -ERANGE;
+               return -IPSET_ERR_BITMAP_RANGE;
 
-       DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip));
-       if (test_and_set_bit(ip_to_id(map, ip), map->members))
-               return -EEXIST;
+       ip = ip_to_id(map, ip);
 
-       return 0;
+       switch (adt) {
+       case IPSET_TEST:
+               return bitmap_ip_test(map, ip);
+       case IPSET_ADD:
+               return bitmap_ip_add(map, ip);
+       case IPSET_DEL:
+               return bitmap_ip_del(map, ip);
+       default:
+               return -EINVAL;
+       }
 }
 
-UADT(ipmap, add)
-KADT(ipmap, add, ipaddr)
+static const struct nla_policy
+bitmap_ip_adt_policy[IPSET_ATTR_ADT_MAX+1] __read_mostly = {
+       [IPSET_ATTR_IP]         = { .type = NLA_U32 },
+       [IPSET_ATTR_IP_TO]      = { .type = NLA_U32 },
+       [IPSET_ATTR_CIDR]       = { .type = NLA_U8 },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
 
-static inline int
-ipmap_del(struct ip_set *set, ip_set_ip_t ip)
+static int
+bitmap_ip_uadt(struct ip_set *set, struct nlattr *head, int len,
+              enum ipset_adt adt, uint32_t *lineno, uint32_t flags)
 {
-       struct ip_set_ipmap *map = set->data;
+       struct bitmap_ip *map = set->data;
+       struct nlattr *tb[IPSET_ATTR_ADT_MAX];
+       bool eexist = flags & IPSET_FLAG_EXIST;
+       uint32_t ip, ip_to, id;
+       int ret = 0;
+
+       if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len,
+                     bitmap_ip_adt_policy))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_IP])
+               ip = ip_set_get_h32(tb[IPSET_ATTR_IP]);
+       else
+               return -IPSET_ERR_PROTOCOL;
 
        if (ip < map->first_ip || ip > map->last_ip)
-               return -ERANGE;
+               return -IPSET_ERR_BITMAP_RANGE;
+
+       if (tb[IPSET_ATTR_TIMEOUT])
+               return -IPSET_ERR_TIMEOUT;
+
+       if (adt == IPSET_TEST)
+               return bitmap_ip_test(map, ip_to_id(map, ip));
+
+       if (tb[IPSET_ATTR_IP_TO]) {
+               ip_to = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]);
+               if (ip > ip_to) {
+                       swap(ip, ip_to);
+                       if (ip < map->first_ip)
+                               return -IPSET_ERR_BITMAP_RANGE;
+               }
+       } else if (tb[IPSET_ATTR_CIDR]) {
+               uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+               
+               if (cidr > 32)
+                       return -IPSET_ERR_INVALID_CIDR;
+               ip_to = ip | ~HOSTMASK(cidr);
+       } else
+               ip_to = ip;
+
+       if (ip_to > map->last_ip)
+               return -IPSET_ERR_BITMAP_RANGE;
+
+       for (; !before(ip_to, ip); ip += map->hosts) {
+               id = ip_to_id(map, ip);
+               ret = adt == IPSET_ADD ? bitmap_ip_add(map, id)
+                                      : bitmap_ip_del(map, id);
+
+               if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) {
+                       if (tb[IPSET_ATTR_LINENO])
+                               *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+                       return ret;
+               }
+       };
+       return ret;
+}
+
+static void
+bitmap_ip_destroy(struct ip_set *set)
+{
+       struct bitmap_ip *map = set->data;
+       
+       ip_set_free(map->members, set->flags);
+       kfree(map);
+       
+       set->data = NULL;
+}
 
-       DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip));
-       if (!test_and_clear_bit(ip_to_id(map, ip), map->members))
-               return -EEXIST;
+static void
+bitmap_ip_flush(struct ip_set *set)
+{
+       struct bitmap_ip *map = set->data;
+       
+       memset(map->members, 0, map->memsize);
+}
+
+static int
+bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
+{
+       const struct bitmap_ip *map = set->data;
+       struct nlattr *nested;
+       uint32_t id, elements;
+
+       for (id = 0, elements = 0; id < map->elements; id++)
+               if (bitmap_ip_test(map, id)) 
+                       elements++;
+
+       nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+       if (!nested)
+               goto nla_put_failure;
+       NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip));
+       NLA_PUT_NET32(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
+       if (map->netmask != 32)
+               NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask);
+       NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements));
+       NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+                     htonl(atomic_read(&set->ref) - 1));
+       NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize));
+       ipset_nest_end(skb, nested);
        
        return 0;
+nla_put_failure:
+       return -EFAULT;
 }
 
-UADT(ipmap, del)
-KADT(ipmap, del, ipaddr)
+static int
+bitmap_ip_list(struct ip_set *set,
+              struct sk_buff *skb, struct netlink_callback *cb)
+{
+       const struct bitmap_ip *map = set->data;
+       struct nlattr *atd, *nested;
+       uint32_t id, first = cb->args[2];
 
-static inline int
-__ipmap_create(const struct ip_set_req_ipmap_create *req,
-              struct ip_set_ipmap *map)
+       atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+       if (!atd)
+               return -EFAULT;
+       for (; cb->args[2] < map->elements; cb->args[2]++) {
+               id = cb->args[2];
+               if (!bitmap_ip_test(map, id)) 
+                       continue;
+               nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+               if (!nested) {
+                       if (id == first) {
+                               nla_nest_cancel(skb, atd);
+                               return -EFAULT;
+                       } else
+                               goto nla_put_failure;
+               }
+               NLA_PUT_NET32(skb, IPSET_ATTR_IP,
+                             htonl(map->first_ip + id * map->hosts));
+               if (map->netmask != 32)
+                       NLA_PUT_U8(skb, IPSET_ATTR_CIDR, map->netmask);
+               ipset_nest_end(skb, nested);
+       }
+       ipset_nest_end(skb, atd);
+       /* Set listing finished */
+       cb->args[2] = 0;
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, nested);
+       ipset_nest_end(skb, atd);
+       return 0;
+}
+
+static const struct ip_set_type_variant bitmap_ip __read_mostly = {
+       .kadt   = bitmap_ip_kadt,
+       .uadt   = bitmap_ip_uadt,
+       .destroy = bitmap_ip_destroy,
+       .flush  = bitmap_ip_flush,
+       .head   = bitmap_ip_head,
+       .list   = bitmap_ip_list,
+};
+
+/* Timeout variant */
+
+struct bitmap_ip_timeout {
+       void *members;          /* the set members */
+       uint32_t first_ip;      /* host byte order, included in range */
+       uint32_t last_ip;       /* host byte order, included in range */
+       uint32_t elements;      /* number of max elements in the set */
+       uint32_t hosts;         /* number of hosts in a subnet */
+       size_t memsize;         /* members size */
+       uint8_t netmask;        /* subnet netmask */
+
+       uint32_t timeout;       /* timeout parameter */
+       struct timer_list gc;   /* garbage collection */
+};
+
+static inline bool
+bitmap_ip_timeout_test(const struct bitmap_ip_timeout *map, uint32_t id)
 {
-       map->netmask = req->netmask;
+       unsigned long *table = map->members;
 
-       if (req->netmask == 0xFFFFFFFF) {
-               map->hosts = 1;
-               map->sizeid = map->last_ip - map->first_ip + 1;
-       } else {
-               unsigned int mask_bits, netmask_bits;
-               ip_set_ip_t mask;
+       return ip_set_timeout_test(table[id]);
+}
+
+static int
+bitmap_ip_timeout_add(struct bitmap_ip_timeout *map,
+                     uint32_t id, uint32_t timeout)
+{
+       unsigned long *table = map->members;
+
+       if (bitmap_ip_timeout_test(map, id))
+               return -IPSET_ERR_EXIST;
+
+       table[id] = ip_set_timeout_set(timeout);
+
+       return 0;
+}
+
+static int
+bitmap_ip_timeout_del(struct bitmap_ip_timeout *map, uint32_t id)
+{
+       unsigned long *table = map->members;
+       int ret = -IPSET_ERR_EXIST;
+
+       if (bitmap_ip_timeout_test(map, id))
+               ret = 0;
+       
+       table[id] = IPSET_ELEM_UNSET;
+       return ret;
+}
+
+static int
+bitmap_ip_timeout_kadt(struct ip_set *set, const struct sk_buff *skb,
+                      enum ipset_adt adt, uint8_t pf, const uint8_t *flags)
+{
+       struct bitmap_ip_timeout *map = set->data;
+       uint32_t ip = ntohl(ip4addr(skb, flags));
+
+       if (pf != AF_INET)
+               return -EINVAL;
+
+       if (ip < map->first_ip || ip > map->last_ip)
+               return -IPSET_ERR_BITMAP_RANGE;
+
+       ip = ip_to_id((const struct bitmap_ip *)map, ip);
+
+       switch (adt) {
+       case IPSET_TEST:
+               return bitmap_ip_timeout_test(map, ip);
+       case IPSET_ADD:
+               return bitmap_ip_timeout_add(map, ip, map->timeout);
+       case IPSET_DEL:
+               return bitmap_ip_timeout_del(map, ip);
+       default:
+               return -EINVAL;
+       }
+}
+
+static int
+bitmap_ip_timeout_uadt(struct ip_set *set, struct nlattr *head, int len,
+                      enum ipset_adt adt, uint32_t *lineno, uint32_t flags)
+{
+       struct bitmap_ip_timeout *map = set->data;
+       struct nlattr *tb[IPSET_ATTR_ADT_MAX];
+       bool eexist = flags & IPSET_FLAG_EXIST;
+       uint32_t ip, ip_to, id, timeout = map->timeout;
+       int ret = 0;
 
-               map->first_ip &= map->netmask;  /* Should we better bark? */
+       if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len,
+                     bitmap_ip_adt_policy))
+               return -IPSET_ERR_PROTOCOL;
 
-               mask = range_to_mask(map->first_ip, map->last_ip, &mask_bits);
-               netmask_bits = mask_to_bits(map->netmask);
+       if (tb[IPSET_ATTR_IP])
+               ip = ip_set_get_h32(tb[IPSET_ATTR_IP]);
+       else
+               return -IPSET_ERR_PROTOCOL;
 
-               if ((!mask && (map->first_ip || map->last_ip != 0xFFFFFFFF))
-                   || netmask_bits <= mask_bits)
-                       return -ENOEXEC;
+       if (ip < map->first_ip || ip > map->last_ip)
+               return -IPSET_ERR_BITMAP_RANGE;
+       
+       if (adt == IPSET_TEST)
+               return bitmap_ip_timeout_test(map,
+                               ip_to_id((const struct bitmap_ip *)map, ip));
 
-               DP("mask_bits %u, netmask_bits %u",
-                  mask_bits, netmask_bits);
-               map->hosts = 2 << (32 - netmask_bits - 1);
-               map->sizeid = 2 << (netmask_bits - mask_bits - 1);
+       if (tb[IPSET_ATTR_IP_TO]) {
+               ip_to = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]);
+               if (ip > ip_to) {
+                       swap(ip, ip_to);
+                       if (ip < map->first_ip)
+                               return -IPSET_ERR_BITMAP_RANGE;
+               }
+       } else if (tb[IPSET_ATTR_CIDR]) {
+               uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+               
+               if (cidr > 32)
+                       return -IPSET_ERR_INVALID_CIDR;
+               ip_to = ip | ~HOSTMASK(cidr);
+       } else
+               ip_to = ip;
+
+       if (ip_to > map->last_ip)
+               return -IPSET_ERR_BITMAP_RANGE;
+       
+       if (tb[IPSET_ATTR_TIMEOUT]) {
+               timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
        }
-       if (map->sizeid > MAX_RANGE + 1) {
-               ip_set_printk("range too big, %d elements (max %d)",
-                              map->sizeid, MAX_RANGE+1);
-               return -ENOEXEC;
+
+       for (; !before(ip_to, ip); ip += map->hosts) {
+               id = ip_to_id((const struct bitmap_ip *)map, ip);
+               ret = adt == IPSET_ADD
+                       ? bitmap_ip_timeout_add(map, id, timeout)
+                       : bitmap_ip_timeout_del(map, id);
+
+               if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) {
+                       if (tb[IPSET_ATTR_LINENO])
+                               *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+                       return ret;
+               }
        }
-       DP("hosts %u, sizeid %u", map->hosts, map->sizeid);
-       return bitmap_bytes(0, map->sizeid - 1);
+       return ret;
 }
 
-BITMAP_CREATE(ipmap)
-BITMAP_DESTROY(ipmap)
-BITMAP_FLUSH(ipmap)
+static void
+bitmap_ip_timeout_destroy(struct ip_set *set)
+{
+       struct bitmap_ip_timeout *map = set->data;
 
-static inline void
-__ipmap_list_header(const struct ip_set_ipmap *map,
-                   struct ip_set_req_ipmap_create *header)
+       /* gc might be running: del_timer_sync can't be used */
+       while (!del_timer(&map->gc))
+               msleep(IPSET_DESTROY_TIMER_SLEEP);
+
+       ip_set_free(map->members, set->flags);
+       kfree(map);
+       
+       set->data = NULL;
+}
+
+static void
+bitmap_ip_timeout_flush(struct ip_set *set)
 {
-       header->netmask = map->netmask;
+       struct bitmap_ip_timeout *map = set->data;
+       
+       memset(map->members, 0, map->memsize);
 }
 
-BITMAP_LIST_HEADER(ipmap)
-BITMAP_LIST_MEMBERS_SIZE(ipmap, ip_set_ip_t, map->sizeid,
-                        test_bit(i, map->members))
+static int
+bitmap_ip_timeout_head(struct ip_set *set, struct sk_buff *skb)
+{
+       const struct bitmap_ip_timeout *map = set->data;
+       struct nlattr *nested;
+       uint32_t id, elements;
+       
+       for (id = 0, elements = 0; id < map->elements; id++)
+               if (bitmap_ip_timeout_test(map, id))
+                       elements++;
+       
+       nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+       if (!nested)
+               goto nla_put_failure;
+       NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip));
+       NLA_PUT_NET32(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
+       if (map->netmask != 32)
+               NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask);
+       NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT , htonl(map->timeout));
+       NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements));
+       NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+                     htonl(atomic_read(&set->ref) - 1));
+       NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize));
+       ipset_nest_end(skb, nested);
+       
+       return 0;
+nla_put_failure:
+       return -EFAULT;
+}
+
+static int
+bitmap_ip_timeout_list(struct ip_set *set,
+                      struct sk_buff *skb, struct netlink_callback *cb)
+{
+       const struct bitmap_ip_timeout *map = set->data;
+       struct nlattr *adt, *nested;
+       uint32_t id, first = cb->args[2];
+       unsigned long *table = map->members;
+       
+       adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
+       if (!adt)
+               return -EFAULT;
+       for (; cb->args[2] < map->elements; cb->args[2]++) {
+               id = cb->args[2];
+               if (!bitmap_ip_timeout_test(map, id))
+                       continue;
+               nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+               if (!nested) {
+                       if (id == first) {
+                               nla_nest_cancel(skb, adt);
+                               return -EFAULT;
+                       } else
+                               goto nla_put_failure;
+               }
+               NLA_PUT_NET32(skb, IPSET_ATTR_IP,
+                             htonl(map->first_ip + id * map->hosts));
+               if (map->netmask != 32)
+                       NLA_PUT_U8(skb, IPSET_ATTR_CIDR, map->netmask);
+               NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+                             htonl(ip_set_timeout_get(table[id])));
+               ipset_nest_end(skb, nested);
+       }
+       ipset_nest_end(skb, adt);
+
+       /* Set listing finished */
+       cb->args[2] = 0;
+       
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, nested);
+       ipset_nest_end(skb, adt);
+       return 0;
+}
+
+static const struct ip_set_type_variant bitmap_ip_timeout __read_mostly = {
+       .kadt   = bitmap_ip_timeout_kadt,
+       .uadt   = bitmap_ip_timeout_uadt,
+       .destroy = bitmap_ip_timeout_destroy,
+       .flush  = bitmap_ip_timeout_flush,
+       .head   = bitmap_ip_timeout_head,
+       .list   = bitmap_ip_timeout_list,
+};
 
 static void
-ipmap_list_members(const struct ip_set *set, void *data, char dont_align)
+bitmap_ip_timeout_gc(unsigned long ul_set)
 {
-       const struct ip_set_ipmap *map = set->data;
-       uint32_t i, n = 0;
-       ip_set_ip_t *d;
+       struct ip_set *set = (struct ip_set *) ul_set;
+       struct bitmap_ip_timeout *map = set->data;
+       unsigned long *table = map->members;
+       uint32_t id;
+
+       /* We run parallel with other readers (test element)
+        * but adding/deleting new entries is locked out */
+       read_lock_bh(&set->lock);
+       for (id = 0; id < map->elements; id++)
+               if (ip_set_timeout_expired(table[id]))
+                       table[id] = IPSET_ELEM_UNSET;
+       read_unlock_bh(&set->lock);
+
+       map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+       add_timer(&map->gc);
+}
+
+static inline void
+bitmap_ip_gc_init(struct ip_set *set)
+{
+       struct bitmap_ip_timeout *map = set->data;
+
+       init_timer(&map->gc);
+       map->gc.data = (unsigned long) set;
+       map->gc.function = bitmap_ip_timeout_gc;
+       map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+       add_timer(&map->gc);
+}
+
+/* Create bitmap:ip type of sets */
+
+static const struct nla_policy
+bitmap_ip_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = {
+       [IPSET_ATTR_IP]         = { .type = NLA_U32 },
+       [IPSET_ATTR_IP_TO]      = { .type = NLA_U32 },
+       [IPSET_ATTR_CIDR]       = { .type = NLA_U8 },
+       [IPSET_ATTR_NETMASK]    = { .type = NLA_U8  },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
+
+static bool
+init_map_ip(struct ip_set *set, struct bitmap_ip *map,
+           uint32_t first_ip, uint32_t last_ip,
+           uint32_t elements, uint32_t hosts, uint8_t netmask)
+{
+       map->members = ip_set_alloc(map->memsize, GFP_KERNEL, &set->flags);
+       if (!map->members)
+               return false;
+       map->first_ip = first_ip;
+       map->last_ip = last_ip;
+       map->elements = elements;
+       map->hosts = hosts;
+       map->netmask = netmask;
+
+       set->data = map;
+       set->family = AF_INET;
        
-       if (dont_align) {
-               memcpy(data, map->members, map->size);
-               return;
+       return true;
+}
+
+static int
+bitmap_ip_create(struct ip_set *set, struct nlattr *head, int len,
+                uint32_t flags)
+{
+       struct nlattr *tb[IPSET_ATTR_CREATE_MAX];
+       uint32_t first_ip, last_ip, hosts, elements;
+       uint8_t netmask = 32;
+
+       if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len,
+                     bitmap_ip_create_policy))
+               return -IPSET_ERR_PROTOCOL;
+       
+       if (tb[IPSET_ATTR_IP])
+               first_ip = ip_set_get_h32(tb[IPSET_ATTR_IP]);
+       else
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_IP_TO]) {
+               last_ip = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]);
+               if (first_ip > last_ip) {
+                       uint32_t tmp = first_ip;
+                       
+                       first_ip = last_ip;
+                       last_ip = tmp;
+               }
+       } else if (tb[IPSET_ATTR_CIDR]) {
+               uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+               
+               if (cidr >= 32)
+                       return -IPSET_ERR_INVALID_CIDR;
+               last_ip = first_ip | ~HOSTMASK(cidr);
+       } else
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_NETMASK]) {
+               netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
+               
+               if (netmask > 32)
+                       return -IPSET_ERR_INVALID_NETMASK;
+
+               first_ip &= HOSTMASK(netmask);
+               last_ip |= ~HOSTMASK(netmask);
        }
        
-       for (i = 0; i < map->sizeid; i++)
-               if (test_bit(i, map->members)) {
-                       d = data + n * IPSET_ALIGN(sizeof(ip_set_ip_t));
-                       *d = map->first_ip + i * map->hosts;
-                       n++;
+       if (netmask == 32) {
+               hosts = 1;
+               elements = last_ip - first_ip + 1;
+       } else {
+               uint8_t mask_bits;
+               uint32_t mask;
+
+               mask = range_to_mask(first_ip, last_ip, &mask_bits);
+
+               if ((!mask && (first_ip || last_ip != 0xFFFFFFFF))
+                   || netmask <= mask_bits)
+                       return -IPSET_ERR_BITMAP_RANGE;
+
+               D("mask_bits %u, netmask %u", mask_bits, netmask);
+               hosts = 2 << (32 - netmask - 1);
+               elements = 2 << (netmask - mask_bits - 1);
+       }
+       if (elements > IPSET_BITMAP_MAX_RANGE + 1) {
+               return -IPSET_ERR_BITMAP_RANGE_SIZE;
+       }
+       D("hosts %u, elements %u", hosts, elements);
+
+       if (tb[IPSET_ATTR_TIMEOUT]) {
+               struct bitmap_ip_timeout *map;
+               
+               map = kzalloc(sizeof(*map), GFP_KERNEL);
+               if (!map)
+                       return -ENOMEM;
+               
+               map->memsize = elements * sizeof(unsigned long);
+                              
+               if (!init_map_ip(set, (struct bitmap_ip *)map,
+                                first_ip, last_ip,
+                                elements, hosts, netmask)) {
+                       kfree(map);
+                       return -ENOMEM;
+               }
+
+               map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
+               set->flags |= IP_SET_FLAG_TIMEOUT;
+               set->variant = &bitmap_ip_timeout;
+               
+               bitmap_ip_gc_init(set);
+       } else {
+               struct bitmap_ip *map;
+               
+               map = kzalloc(sizeof(*map), GFP_KERNEL);
+               if (!map)
+                       return -ENOMEM;
+               
+               map->memsize = bitmap_bytes(0, elements - 1);
+
+               if (!init_map_ip(set, map,
+                                first_ip, last_ip,
+                                elements, hosts, netmask)) {
+                       kfree(map);
+                       return -ENOMEM;
                }
+
+               set->variant = &bitmap_ip;
+       }
+       return 0;
 }
 
-IP_SET_TYPE(ipmap, IPSET_TYPE_IP | IPSET_DATA_SINGLE)
+static struct ip_set_type bitmap_ip_type = {
+       .name           = "bitmap:ip",
+       .protocol       = IPSET_PROTOCOL,
+       .features       = IPSET_TYPE_IP,
+       .family         = AF_INET,
+       .revision       = 0,
+       .create         = bitmap_ip_create,
+       .me             = THIS_MODULE,
+};
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("ipmap type of IP sets");
+static int __init
+bitmap_ip_init(void)
+{
+       return ip_set_type_register(&bitmap_ip_type);
+}
+
+static void __exit
+bitmap_ip_fini(void)
+{
+       ip_set_type_unregister(&bitmap_ip_type);
+}
 
-REGISTER_MODULE(ipmap)
+module_init(bitmap_ip_init);
+module_exit(bitmap_ip_fini);
index 89e907b309f96a18d0f718faed06bbb3ed6878e1..45335dd4e152a0b95a06031b73785663f229d8ea 100644 (file)
@@ -1,14 +1,14 @@
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
- *                         Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2008 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *                        Martin Josefsson <gandalf@wlug.westbo.se>
+ * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
-/* Kernel module implementing an IP set type: the macipmap type */
+/* Kernel module implementing an IP set type: the bitmap:ip,mac type */
 
 #include <linux/module.h>
 #include <linux/ip.h>
 #include <asm/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+#include <net/pfxlen.h>
 
-#include <linux/netfilter_ipv4/ip_set_macipmap.h>
+#include <linux/netfilter/ip_set.h>
+#include <linux/netfilter/ip_set_timeout.h>
+#include <linux/netfilter/ip_set_bitmap.h>
 
-static int
-macipmap_utest(struct ip_set *set, const void *data, u_int32_t size)
-{
-       const struct ip_set_macipmap *map = set->data;
-       const struct ip_set_macip *table = map->members;        
-       const struct ip_set_req_macipmap *req = data;
-
-       if (req->ip < map->first_ip || req->ip > map->last_ip)
-               return -ERANGE;
-
-       DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(req->ip));             
-       if (table[req->ip - map->first_ip].match) {
-               return (memcmp(req->ethernet,
-                              &table[req->ip - map->first_ip].ethernet,
-                              ETH_ALEN) == 0);
-       } else {
-               return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets");
+MODULE_ALIAS("ip_set_bitmap:ip,mac");
+
+enum {
+       MAC_EMPTY,              /* element is not set */
+       MAC_FILLED,             /* element is set with MAC */
+       MAC_UNSET,              /* element is set, without MAC */
+};
+
+/* Member element without and with timeout */
+
+struct ipmac {
+       unsigned char ether[ETH_ALEN];
+       unsigned char match;
+};
+
+struct ipmac_timeout {
+       unsigned char ether[ETH_ALEN];
+       unsigned char match;
+       unsigned long timeout;
+};
+
+struct bitmap_ipmac {
+       void *members;          /* the set members */
+       uint32_t first_ip;      /* host byte order, included in range */
+       uint32_t last_ip;       /* host byte order, included in range */
+       uint32_t timeout;       /* timeout value */
+       struct timer_list gc;   /* garbage collector */
+       size_t elem_size;       /* size of element */
+};
+
+static inline void *
+bitmap_ipmac_elem(const struct bitmap_ipmac *map, uint32_t id)
+{
+       return (void *)((char *)map->members + id * map->elem_size);
+}
+
+static inline bool
+bitmap_timeout(const struct bitmap_ipmac *map, uint32_t id)
+{
+       const struct ipmac_timeout *elem = bitmap_ipmac_elem(map, id);
+
+       return ip_set_timeout_test(elem->timeout);
+}
+
+static inline bool
+bitmap_expired(const struct bitmap_ipmac *map, uint32_t id)
+{
+       const struct ipmac_timeout *elem = bitmap_ipmac_elem(map, id);
+
+       return ip_set_timeout_expired(elem->timeout);
+}
+
+static inline int
+bitmap_ipmac_exist(const struct ipmac *elem, bool with_timeout)
+{
+       const struct ipmac_timeout *e = (const struct ipmac_timeout *) elem;
+
+       return elem->match == MAC_UNSET
+              || (elem->match == MAC_FILLED
+                  && !(with_timeout && ip_set_timeout_expired(e->timeout)));
+}
+
+static inline int
+bitmap_ipmac_test(const struct bitmap_ipmac *map, bool with_timeout,
+                 uint32_t id, const unsigned char *ether)
+{
+       const struct ipmac *elem = bitmap_ipmac_elem(map, id);
+
+       switch (elem->match) {
+       case MAC_UNSET:
+               /* Trigger kernel to fill out the ethernet address */
+               return -EAGAIN;
+       case MAC_FILLED:
+               return (ether == NULL
+                       || memcmp(ether, elem->ether, ETH_ALEN) == 0)
+                      && (!with_timeout || bitmap_timeout(map, id));
        }
+       return 0;
 }
 
 static int
-macipmap_ktest(struct ip_set *set,
-              const struct sk_buff *skb,
-              const u_int32_t *flags)
+bitmap_ipmac_add(struct bitmap_ipmac *map, bool with_timeout,
+                uint32_t id, const unsigned char *ether,
+                uint32_t timeout)
 {
-       const struct ip_set_macipmap *map = set->data;
-       const struct ip_set_macip *table = map->members;
-       ip_set_ip_t ip;
-       
-       ip = ipaddr(skb, flags);
+       struct ipmac *elem = bitmap_ipmac_elem(map, id);
+       struct ipmac_timeout *e = (struct ipmac_timeout *) elem;
 
-       if (ip < map->first_ip || ip > map->last_ip)
-               return 0;
-
-       DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip));
-       if (table[ip - map->first_ip].match) {
-               /* Is mac pointer valid?
-                * If so, compare... */
-               return (skb_mac_header(skb) >= skb->head
-                       && (skb_mac_header(skb) + ETH_HLEN) <= skb->data
-                       && (memcmp(eth_hdr(skb)->h_source,
-                                  &table[ip - map->first_ip].ethernet,
-                                  ETH_ALEN) == 0));
-       } else {
-               return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0);
+       switch (elem->match) {
+       case MAC_UNSET:
+               if (!ether)
+                       /* Already added without ethernet address */
+                       return -IPSET_ERR_EXIST;
+               /* Fill the MAC address and activate the timer */
+               memcpy(elem->ether, ether, ETH_ALEN);
+               elem->match = MAC_FILLED;
+               if (with_timeout) {
+                       if (timeout == map->timeout)
+                               /* Timeout was not specified, get stored one */
+                               timeout = e->timeout;
+                       e->timeout = ip_set_timeout_set(timeout);
+               }
+               break;
+       case MAC_FILLED:
+               if (!(with_timeout && bitmap_expired(map, id)))
+                       return -IPSET_ERR_EXIST;
+               /* Fall through */
+       case MAC_EMPTY:
+               if (ether) {
+                       memcpy(elem->ether, ether, ETH_ALEN);
+                       elem->match = MAC_FILLED;
+               } else
+                       elem->match = MAC_UNSET;
+               if (with_timeout) {
+                       /* If MAC is unset yet, we store plain timeout
+                        * because the timer is not activated yet
+                        * and we can reuse it later when MAC is filled out,
+                        * possibly by the kernel */
+                       e->timeout = ether ? ip_set_timeout_set(timeout)
+                                          : timeout;
+               }
+               break;
        }
+
+       return 0;
 }
 
-/* returns 0 on success */
-static inline int
-macipmap_add(struct ip_set *set,
-            ip_set_ip_t ip, const unsigned char *ethernet)
+static int
+bitmap_ipmac_del(struct bitmap_ipmac *map, bool with_timeout,
+                uint32_t id)
 {
-       struct ip_set_macipmap *map = set->data;
-       struct ip_set_macip *table = map->members;
+       struct ipmac *elem = bitmap_ipmac_elem(map, id);
 
-       if (ip < map->first_ip || ip > map->last_ip)
-               return -ERANGE;
-       if (table[ip - map->first_ip].match)
-               return -EEXIST;
+       if (elem->match == MAC_EMPTY
+           || (with_timeout && bitmap_expired(map, id)))
+               return -IPSET_ERR_EXIST;
+
+       elem->match = MAC_EMPTY;
 
-       DP("set: %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip));
-       memcpy(&table[ip - map->first_ip].ethernet, ethernet, ETH_ALEN);
-       table[ip - map->first_ip].match = IPSET_MACIP_ISSET;
        return 0;
 }
 
-#define KADT_CONDITION                                         \
-       if (!(skb_mac_header(skb) >= skb->head                  \
-             && (skb_mac_header(skb) + ETH_HLEN) <= skb->data))\
+static int
+bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
+                 enum ipset_adt adt, uint8_t pf, const uint8_t *flags)
+{
+       struct bitmap_ipmac *map = set->data;
+       uint32_t ip = ntohl(ip4addr(skb, flags));
+       bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT;
+
+       if (pf != AF_INET)
                return -EINVAL;
 
-UADT(macipmap, add, req->ethernet)
-KADT(macipmap, add, ipaddr, eth_hdr(skb)->h_source)
+       if (ip < map->first_ip || ip > map->last_ip)
+               return -IPSET_ERR_BITMAP_RANGE;
 
-static inline int
-macipmap_del(struct ip_set *set, ip_set_ip_t ip)
+       if (skb_mac_header(skb) < skb->head
+           || (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+               return -EINVAL;
+
+       ip -= map->first_ip;
+
+       switch (adt) {
+       case IPSET_TEST:
+               return bitmap_ipmac_test(map, with_timeout,
+                                        ip, eth_hdr(skb)->h_source);
+       case IPSET_ADD:
+               return bitmap_ipmac_add(map, with_timeout,
+                                       ip, eth_hdr(skb)->h_source,
+                                       map->timeout);
+       case IPSET_DEL:
+               return bitmap_ipmac_del(map, with_timeout, ip);
+       default:
+               return -EINVAL;
+       }
+}
+
+static const struct nla_policy
+bitmap_ipmac_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_IP]         = { .type = NLA_U32 },
+       [IPSET_ATTR_ETHER]      = { .type = NLA_BINARY, .len  = ETH_ALEN },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
+
+static int
+bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *head, int len,
+                 enum ipset_adt adt, uint32_t *lineno, uint32_t flags)
 {
-       struct ip_set_macipmap *map = set->data;
-       struct ip_set_macip *table = map->members;
+       struct bitmap_ipmac *map = set->data;
+       struct nlattr *tb[IPSET_ATTR_ADT_MAX];
+       bool eexist = flags & IPSET_FLAG_EXIST;
+       bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT;
+       uint32_t ip, timeout = map->timeout;
+       unsigned char *ether = NULL;
+       int ret = 0;
+
+       if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len,
+                     bitmap_ipmac_adt_policy))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_IP])
+               ip = ip_set_get_h32(tb[IPSET_ATTR_IP]);
+       else
+               return -IPSET_ERR_PROTOCOL;
 
        if (ip < map->first_ip || ip > map->last_ip)
-               return -ERANGE;
-       if (!table[ip - map->first_ip].match)
-               return -EEXIST;
+               return -IPSET_ERR_BITMAP_RANGE;
 
-       table[ip - map->first_ip].match = 0;
-       DP("set: %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip));
-       return 0;
+       if (tb[IPSET_ATTR_ETHER])
+               ether = nla_data(tb[IPSET_ATTR_ETHER]);
+
+       if (tb[IPSET_ATTR_TIMEOUT]) {
+               if (!with_timeout)
+                       return -IPSET_ERR_TIMEOUT;
+               timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
+       }
+
+       ip -= map->first_ip;
+
+       if (adt == IPSET_TEST)
+               return bitmap_ipmac_test(map, with_timeout, ip, ether);
+
+       ret = adt == IPSET_ADD ? bitmap_ipmac_add(map, with_timeout,
+                                                 ip, ether, timeout)
+                              : bitmap_ipmac_del(map, with_timeout, ip);
+
+       if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) {
+               if (tb[IPSET_ATTR_LINENO])
+                       *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+               return ret;
+       }
+       return ret;
 }
 
-#undef KADT_CONDITION
-#define KADT_CONDITION
+static void
+bitmap_ipmac_destroy(struct ip_set *set)
+{
+       struct bitmap_ipmac *map = set->data;
 
-UADT(macipmap, del)
-KADT(macipmap, del, ipaddr)
+       /* gc might be running: del_timer_sync can't be used */
+       if (set->flags & IP_SET_FLAG_TIMEOUT)
+               while (!del_timer(&map->gc))
+                       msleep(IPSET_DESTROY_TIMER_SLEEP);
+       
+       ip_set_free(map->members, set->flags);
+       kfree(map);
+       
+       set->data = NULL;
+}
 
-static inline int
-__macipmap_create(const struct ip_set_req_macipmap_create *req,
-                 struct ip_set_macipmap *map)
+static void
+bitmap_ipmac_flush(struct ip_set *set)
+{
+       struct bitmap_ipmac *map = set->data;
+       
+       memset(map->members, 0,
+              (map->last_ip - map->first_ip + 1) * map->elem_size);
+}
+
+static int
+bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
 {
-       if (req->to - req->from > MAX_RANGE) {
-               ip_set_printk("range too big, %d elements (max %d)",
-                             req->to - req->from + 1, MAX_RANGE+1);
-               return -ENOEXEC;
+       const struct bitmap_ipmac *map = set->data;
+       struct nlattr *nested;
+       const struct ipmac *elem;
+       uint32_t id, elements = 0, last = map->last_ip - map->first_ip;
+       bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT;
+
+       for (id = 0; id <= last; id++) {
+               elem = bitmap_ipmac_elem(map, id);
+               if (bitmap_ipmac_exist(elem, with_timeout))
+                       elements++;
        }
-       map->flags = req->flags;
-       return (req->to - req->from + 1) * sizeof(struct ip_set_macip);
+
+       nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+       if (!nested)
+               goto nla_put_failure;
+       NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip));
+       NLA_PUT_NET32(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
+       NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements));
+       NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+                     htonl(atomic_read(&set->ref) - 1));
+       NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
+                     htonl((map->last_ip - map->first_ip + 1)
+                           * map->elem_size));
+       if (with_timeout)
+               NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
+       ipset_nest_end(skb, nested);
+       
+       return 0;
+nla_put_failure:
+       return -EFAULT;
 }
 
-BITMAP_CREATE(macipmap)
-BITMAP_DESTROY(macipmap)
-BITMAP_FLUSH(macipmap)
+static int
+bitmap_ipmac_list(struct ip_set *set,
+                 struct sk_buff *skb, struct netlink_callback *cb)
+{
+       const struct bitmap_ipmac *map = set->data;
+       const struct ipmac *elem;
+       struct nlattr *atd, *nested;
+       uint32_t id, first = cb->args[2];
+       uint32_t last = map->last_ip - map->first_ip;
+       bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT;
+
+       atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+       if (!atd)
+               return -EFAULT;
+       for (; cb->args[2] <= last; cb->args[2]++) {
+               id = cb->args[2];
+               elem = bitmap_ipmac_elem(map, id);
+               if (!bitmap_ipmac_exist(elem, with_timeout)) 
+                       continue;
+               nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+               if (!nested) {
+                       if (id == first) {
+                               nla_nest_cancel(skb, atd);
+                               return -EFAULT;
+                       } else
+                               goto nla_put_failure;
+               }
+               NLA_PUT_NET32(skb, IPSET_ATTR_IP,
+                             htonl(map->first_ip + id));
+               if (elem->match == MAC_FILLED)
+                       NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN,
+                               elem->ether);
+               if (with_timeout) {
+                       const struct ipmac_timeout *e =
+                               (const struct ipmac_timeout *)elem;
+                       uint32_t timeout = e->match == MAC_UNSET ? e->timeout
+                                       : ip_set_timeout_get(e->timeout);
+       
+                       NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+                                     htonl(timeout));
+               }
+               ipset_nest_end(skb, nested);
+       }
+       ipset_nest_end(skb, atd);
+       /* Set listing finished */
+       cb->args[2] = 0;
+       
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, nested);
+       ipset_nest_end(skb, atd);
+       return 0;
+}
+
+const struct ip_set_type_variant bitmap_ipmac __read_mostly = {
+       .kadt   = bitmap_ipmac_kadt,
+       .uadt   = bitmap_ipmac_uadt,
+       .destroy = bitmap_ipmac_destroy,
+       .flush  = bitmap_ipmac_flush,
+       .head   = bitmap_ipmac_head,
+       .list   = bitmap_ipmac_list,
+};
+
+static void
+bitmap_ipmac_timeout_gc(unsigned long ul_set)
+{
+       struct ip_set *set = (struct ip_set *) ul_set;
+       struct bitmap_ipmac *map = set->data;
+       struct ipmac_timeout *elem;
+       uint32_t id, last = map->last_ip - map->first_ip;
+       
+       /* We run parallel with other readers (test element)
+        * but adding/deleting new entries is locked out */
+       read_lock_bh(&set->lock);
+       for (id = 0; id <= last; id++) {
+               elem = bitmap_ipmac_elem(map, id);
+               if (elem->match == MAC_FILLED
+                   && ip_set_timeout_expired(elem->timeout))
+                       elem->match = MAC_EMPTY;
+       }
+       read_unlock_bh(&set->lock);
+
+       map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+       add_timer(&map->gc);
+}
 
 static inline void
-__macipmap_list_header(const struct ip_set_macipmap *map,
-                      struct ip_set_req_macipmap_create *header)
+bitmap_ipmac_timeout_gc_init(struct ip_set *set)
 {
-       header->flags = map->flags;
+       struct bitmap_ipmac *map = set->data;
+
+       init_timer(&map->gc);
+       map->gc.data = (unsigned long) set;
+       map->gc.function = bitmap_ipmac_timeout_gc;
+       map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+       add_timer(&map->gc);
 }
 
-BITMAP_LIST_HEADER(macipmap)
-BITMAP_LIST_MEMBERS_SIZE(macipmap, struct ip_set_req_macipmap,
-                        (map->last_ip - map->first_ip + 1),
-                        ((const struct ip_set_macip *)map->members)[i].match)
+/* Create bitmap:ip,mac type of sets */
 
+static const struct nla_policy
+bitmap_ipmac_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = {
+       [IPSET_ATTR_IP]         = { .type = NLA_U32 },
+       [IPSET_ATTR_IP_TO]      = { .type = NLA_U32 },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
 
-static void
-macipmap_list_members(const struct ip_set *set, void *data, char dont_align)
+static bool
+init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
+              uint32_t first_ip, uint32_t last_ip)
 {
-       const struct ip_set_macipmap *map = set->data;
-       const struct ip_set_macip *table = map->members;
-       uint32_t i, n = 0;
-       struct ip_set_req_macipmap *d;
+       map->members = ip_set_alloc((last_ip - first_ip + 1) * map->elem_size,
+                                   GFP_KERNEL, &set->flags);
+       if (!map->members)
+               return false;
+       map->first_ip = first_ip;
+       map->last_ip = last_ip;
+
+       set->data = map;
+       set->family = AF_INET;
        
-       if (dont_align) {
-               memcpy(data, map->members, map->size);
-               return;
-       }
+       return true;
+}
+
+static int
+bitmap_ipmac_create(struct ip_set *set, struct nlattr *head, int len,
+                   uint32_t flags)
+{
+       struct nlattr *tb[IPSET_ATTR_CREATE_MAX];
+       uint32_t first_ip, last_ip, elements;
+       struct bitmap_ipmac *map;
+
+       if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len,
+                     bitmap_ipmac_create_policy))
+               return -IPSET_ERR_PROTOCOL;
        
-       for (i = 0; i < map->last_ip - map->first_ip + 1; i++)
-               if (table[i].match) {
-                       d = data + n * IPSET_ALIGN(sizeof(struct ip_set_req_macipmap));
-                       d->ip = map->first_ip + i;
-                       memcpy(d->ethernet, &table[i].ethernet, ETH_ALEN);
-                       n++;
+       if (tb[IPSET_ATTR_IP])
+               first_ip = ip_set_get_h32(tb[IPSET_ATTR_IP]);
+       else
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_IP_TO]) {
+               last_ip = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]);
+               if (first_ip > last_ip) {
+                       uint32_t tmp = first_ip;
+                       
+                       first_ip = last_ip;
+                       last_ip = tmp;
                }
+       } else if (tb[IPSET_ATTR_CIDR]) {
+               uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+               
+               if (cidr >= 32)
+                       return -IPSET_ERR_INVALID_CIDR;
+               last_ip = first_ip | ~HOSTMASK(cidr);
+       } else
+               return -IPSET_ERR_PROTOCOL;
+
+       elements = last_ip - first_ip + 1;
+
+       if (elements > IPSET_BITMAP_MAX_RANGE + 1)
+               return -IPSET_ERR_BITMAP_RANGE_SIZE;
+
+       set->variant = &bitmap_ipmac;
+               
+       map = kzalloc(sizeof(*map), GFP_KERNEL);
+       if (!map)
+               return -ENOMEM;
+
+       if (tb[IPSET_ATTR_TIMEOUT]) {
+               map->elem_size = sizeof(struct ipmac_timeout);
+                              
+               if (!init_map_ipmac(set, map, first_ip, last_ip)) {
+                       kfree(map);
+                       return -ENOMEM;
+               }
+
+               map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
+               set->flags |= IP_SET_FLAG_TIMEOUT;
+               
+               bitmap_ipmac_timeout_gc_init(set);
+       } else {                
+               map->elem_size = sizeof(struct ipmac);
+
+               if (!init_map_ipmac(set, map, first_ip, last_ip)) {
+                       kfree(map);
+                       return -ENOMEM;
+               }
+       }
+       return 0;
 }
 
-IP_SET_TYPE(macipmap, IPSET_TYPE_IP | IPSET_DATA_SINGLE)
+struct ip_set_type bitmap_ipmac_type = {
+       .name           = "bitmap:ip,mac",
+       .protocol       = IPSET_PROTOCOL,
+       .features       = IPSET_TYPE_IP,
+       .family         = AF_INET,
+       .revision       = 0,
+       .create         = bitmap_ipmac_create,
+       .me             = THIS_MODULE,
+};
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("macipmap type of IP sets");
+static int __init
+bitmap_ipmac_init(void)
+{
+       return ip_set_type_register(&bitmap_ipmac_type);
+}
+
+static void __exit
+bitmap_ipmac_fini(void)
+{
+       ip_set_type_unregister(&bitmap_ipmac_type);
+}
 
-REGISTER_MODULE(macipmap)
+module_init(bitmap_ipmac_init);
+module_exit(bitmap_ipmac_fini);
index 8bb6e7602f957387c713e1adb569d0d5c5f169c8..3afd0312eae78a6648107f00c6f4bc26e77139f8 100644 (file)
@@ -1,11 +1,11 @@
-/* Copyright (C) 2003-2008 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
-/* Kernel module implementing a port set type as a bitmap */
+/* Kernel module implementing an IP set type: the bitmap:port type */
 
 #include <linux/module.h>
 #include <linux/ip.h>
 #include <asm/uaccess.h>
 #include <asm/bitops.h>
 #include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+#include <net/pfxlen.h>
 
-#include <net/ip.h>
+#include <linux/netfilter/ip_set.h>
+#include <linux/netfilter/ip_set_bitmap.h>
+#include <linux/netfilter/ip_set_getport.h>
+#define IP_SET_BITMAP_TIMEOUT
+#include <linux/netfilter/ip_set_timeout.h>
 
-#include <linux/netfilter_ipv4/ip_set_portmap.h>
-#include <linux/netfilter_ipv4/ip_set_getport.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("bitmap:port type of IP sets");
+MODULE_ALIAS("ip_set_bitmap:port");
+
+/* Base variant */
+
+struct bitmap_port {
+       void *members;          /* the set members */
+       uint16_t first_port;    /* host byte order, included in range */
+       uint16_t last_port;     /* host byte order, included in range */
+       size_t memsize;         /* members size */
+};
 
 static inline int
-portmap_test(const struct ip_set *set, ip_set_ip_t port)
+bitmap_port_test(const struct bitmap_port *map, uint16_t id)
 {
-       const struct ip_set_portmap *map = set->data;
+       return !!test_bit(id, map->members);
+}
 
-       if (port < map->first_ip || port > map->last_ip)
-               return -ERANGE;
-               
-       DP("set: %s, port: %u", set->name, port);
-       return !!test_bit(port - map->first_ip, map->members);
+static inline int
+bitmap_port_add(struct bitmap_port *map, uint16_t id)
+{
+       if (test_and_set_bit(id, map->members))
+               return -IPSET_ERR_EXIST;
+
+       return 0;
 }
 
-#define KADT_CONDITION                 \
-       if (ip == INVALID_PORT)         \
-               return 0;       
+static int
+bitmap_port_del(struct bitmap_port *map, uint16_t id)
+{
+       if (!test_and_clear_bit(id, map->members))
+               return -IPSET_ERR_EXIST;
 
-UADT(portmap, test)
-KADT(portmap, test, get_port)
+       return 0;
+}
 
-static inline int
-portmap_add(struct ip_set *set, ip_set_ip_t port)
+static int
+bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
+                enum ipset_adt adt, uint8_t pf, const uint8_t *flags)
 {
-       struct ip_set_portmap *map = set->data;
+       struct bitmap_port *map = set->data;
+       uint32_t port = get_port(pf, skb, flags);
+       
+       if (port == IPSET_INVALID_PORT)
+               return 0;
+       
+       port = ntohs(port);
 
-       if (port < map->first_ip || port > map->last_ip)
-               return -ERANGE;
-       if (test_and_set_bit(port - map->first_ip, map->members))
-               return -EEXIST;
+       if (port < map->first_port || port > map->last_port)
+               return -IPSET_ERR_BITMAP_RANGE;
        
-       DP("set: %s, port %u", set->name, port);
-       return 0;
+       port -= map->first_port;
+
+       switch (adt) {
+       case IPSET_TEST:
+               return bitmap_port_test(map, port);
+       case IPSET_ADD:
+               return bitmap_port_add(map, port);
+       case IPSET_DEL:
+               return bitmap_port_del(map, port);
+       default:
+               return -EINVAL;
+       }
 }
 
-UADT(portmap, add)
-KADT(portmap, add, get_port)
+static const struct nla_policy
+bitmap_port_adt_policy[IPSET_ATTR_ADT_MAX+1] __read_mostly = {
+       [IPSET_ATTR_PORT]       = { .type = NLA_U16 },
+       [IPSET_ATTR_PORT_TO]    = { .type = NLA_U16 },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
 
-static inline int
-portmap_del(struct ip_set *set, ip_set_ip_t port)
+static int
+bitmap_port_uadt(struct ip_set *set, struct nlattr *head, int len,
+                enum ipset_adt adt, uint32_t *lineno, uint32_t flags)
 {
-       struct ip_set_portmap *map = set->data;
+       struct bitmap_port *map = set->data;
+       struct nlattr *tb[IPSET_ATTR_ADT_MAX];
+       bool eexist = flags & IPSET_FLAG_EXIST;
+       uint32_t port;
+       uint16_t id, port_to;
+       int ret = 0;
+
+       if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len,
+                     bitmap_port_adt_policy))
+               return -IPSET_ERR_PROTOCOL;
 
-       if (port < map->first_ip || port > map->last_ip)
-               return -ERANGE;
-       if (!test_and_clear_bit(port - map->first_ip, map->members))
-               return -EEXIST;
+       if (tb[IPSET_ATTR_PORT])
+               port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
+       else
+               return -IPSET_ERR_PROTOCOL;
        
-       DP("set: %s, port %u", set->name, port);
-       return 0;
+       if (port < map->first_port || port > map->last_port)
+               return -IPSET_ERR_BITMAP_RANGE;
+
+       if (tb[IPSET_ATTR_TIMEOUT])
+               return -IPSET_ERR_TIMEOUT;
+
+       if (adt == IPSET_TEST)
+               return bitmap_port_test(map, port - map->first_port);
+
+       if (tb[IPSET_ATTR_PORT_TO]) {
+               port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+               if (port > port_to) {
+                       swap(port, port_to);
+                       if (port < map->first_port)
+                               return -IPSET_ERR_BITMAP_RANGE;
+               }
+       } else
+               port_to = port;
+
+       if (port_to > map->last_port)
+               return -IPSET_ERR_BITMAP_RANGE;
+
+       for (; port <= port_to; port++) {
+               id = port - map->first_port;
+               ret = adt == IPSET_ADD ? bitmap_port_add(map, id)
+                                      : bitmap_port_del(map, id);
+
+               if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) {
+                       if (tb[IPSET_ATTR_LINENO])
+                               *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+                       return ret;
+               }
+       }
+       return ret;
 }
 
-UADT(portmap, del)
-KADT(portmap, del, get_port)
+static void
+bitmap_port_destroy(struct ip_set *set)
+{
+       struct bitmap_port *map = set->data;
+       
+       ip_set_free(map->members, set->flags);
+       kfree(map);
+       
+       set->data = NULL;
+}
 
-static inline int
-__portmap_create(const struct ip_set_req_portmap_create *req,
-                struct ip_set_portmap *map)
+static void
+bitmap_port_flush(struct ip_set *set)
 {
-       if (req->to - req->from > MAX_RANGE) {
-               ip_set_printk("range too big, %d elements (max %d)",
-                             req->to - req->from + 1, MAX_RANGE+1);
-               return -ENOEXEC;
+       struct bitmap_port *map = set->data;
+       
+       memset(map->members, 0, map->memsize);
+}
+
+static int
+bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
+{
+       struct bitmap_port *map = set->data;
+       struct nlattr *nested;
+       uint32_t id;
+       uint16_t elements, last = map->last_port - map->first_port;
+
+       for (id = 0, elements = 0; id <= last; id++)
+               if (test_bit(id, map->members)) 
+                       elements++;
+
+       nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+       if (!nested)
+               goto nla_put_failure;
+       NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port));
+       NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
+       NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements));
+       NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+                     htonl(atomic_read(&set->ref) - 1));
+       NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize));
+       ipset_nest_end(skb, nested);
+       
+       return 0;
+nla_put_failure:
+       return -EFAULT;
+}
+
+static int
+bitmap_port_list(struct ip_set *set,
+                struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct bitmap_port *map = set->data;
+       struct nlattr *atd, *nested;
+       uint16_t id, first = cb->args[2];
+       uint16_t last = map->last_port - map->first_port;
+
+       atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+       if (!atd)
+               return -EFAULT;
+       for (; cb->args[2] <= last; cb->args[2]++) {
+               id = cb->args[2];
+               if (!test_bit(id, map->members)) 
+                       continue;
+               nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+               if (!nested) {
+                       if (id == first) {
+                               nla_nest_cancel(skb, atd);
+                               return -EFAULT;
+                       } else
+                               goto nla_put_failure;
+               }
+               NLA_PUT_NET16(skb, IPSET_ATTR_PORT,
+                             htons(map->first_port + id));
+               ipset_nest_end(skb, nested);
        }
-       return bitmap_bytes(req->from, req->to);
+       ipset_nest_end(skb, atd);
+       /* Set listing finished */
+       cb->args[2] = 0;
+       
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, nested);
+       ipset_nest_end(skb, atd);
+       return 0;
 }
 
-BITMAP_CREATE(portmap)
-BITMAP_DESTROY(portmap)
-BITMAP_FLUSH(portmap)
+const struct ip_set_type_variant bitmap_port __read_mostly = {
+       .kadt   = bitmap_port_kadt,
+       .uadt   = bitmap_port_uadt,
+       .destroy = bitmap_port_destroy,
+       .flush  = bitmap_port_flush,
+       .head   = bitmap_port_head,
+       .list   = bitmap_port_list,
+};
 
-static inline void
-__portmap_list_header(const struct ip_set_portmap *map,
-                     struct ip_set_req_portmap_create *header)
+/* Timeout variant */
+
+struct bitmap_port_timeout {
+       void *members;          /* the set members */
+       uint16_t first_port;    /* host byte order, included in range */
+       uint16_t last_port;     /* host byte order, included in range */
+       size_t memsize;         /* members size */
+
+       uint32_t timeout;       /* timeout parameter */
+       struct timer_list gc;   /* garbage collection */
+};
+
+static inline bool
+bitmap_port_timeout_test(const struct bitmap_port_timeout *map, uint16_t id)
+{
+       unsigned long *timeout = map->members;
+
+       return ip_set_timeout_test(timeout[id]);
+}
+
+static int
+bitmap_port_timeout_add(const struct bitmap_port_timeout *map,
+                       uint16_t id, uint32_t timeout)
+{
+       unsigned long *table = map->members;
+
+       if (bitmap_port_timeout_test(map, id))
+               return -IPSET_ERR_EXIST;
+
+       table[id] = ip_set_timeout_set(timeout);
+
+       return 0;
+}
+
+static int
+bitmap_port_timeout_del(const struct bitmap_port_timeout *map,
+                       uint16_t id)
+{
+       unsigned long *table = map->members;
+       int ret = -IPSET_ERR_EXIST;
+
+       if (bitmap_port_timeout_test(map, id))
+               ret = 0;
+       
+       table[id] = IPSET_ELEM_UNSET;
+       return ret;
+}
+
+static int
+bitmap_port_timeout_kadt(struct ip_set *set, const struct sk_buff *skb,
+                        enum ipset_adt adt, uint8_t pf, const uint8_t *flags)
+{
+       struct bitmap_port_timeout *map = set->data;
+       uint32_t port = get_port(pf, skb, flags);
+       
+       if (port == IPSET_INVALID_PORT)
+               return 0;
+
+       port = ntohs(port);
+
+       if (port < map->first_port || port > map->last_port)
+               return -IPSET_ERR_BITMAP_RANGE;
+
+       port -= map->first_port;
+
+       switch (adt) {
+       case IPSET_TEST:
+               return bitmap_port_timeout_test(map, port);
+       case IPSET_ADD:
+               return bitmap_port_timeout_add(map, port, map->timeout);
+       case IPSET_DEL:
+               return bitmap_port_timeout_del(map, port);
+       default:
+               return -EINVAL;
+       }
+}
+
+static int
+bitmap_port_timeout_uadt(struct ip_set *set, struct nlattr *head, int len,
+                        enum ipset_adt adt, uint32_t *lineno, uint32_t flags)
 {
+       const struct bitmap_port_timeout *map = set->data;
+       struct nlattr *tb[IPSET_ATTR_ADT_MAX];
+       bool eexist = flags & IPSET_FLAG_EXIST;
+       uint16_t port_to, id;
+       uint32_t port, timeout = map->timeout;
+       int ret = 0;
+
+       if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len,
+                     bitmap_port_adt_policy))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_PORT])
+               port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
+       else
+               return -IPSET_ERR_PROTOCOL;
+
+       if (port < map->first_port || port > map->last_port)
+               return -IPSET_ERR_BITMAP_RANGE;
+
+       if (adt == IPSET_TEST)
+               return bitmap_port_timeout_test(map, port - map->first_port);
+
+       if (tb[IPSET_ATTR_PORT_TO]) {
+               port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+               if (port > port_to) {
+                       swap(port, port_to);
+                       if (port < map->first_port)
+                               return -IPSET_ERR_BITMAP_RANGE;
+               }
+       } else
+               port_to = port;
+       
+       if (port_to > map->last_port)
+               return -IPSET_ERR_BITMAP_RANGE;
+
+       if (tb[IPSET_ATTR_TIMEOUT])
+               timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
+       
+       for (; port <= port_to; port++) {
+               id = port - map->first_port;
+               ret = adt == IPSET_ADD
+                       ? bitmap_port_timeout_add(map, id, timeout)
+                       : bitmap_port_timeout_del(map, id);
+               
+               if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) {
+                       if (tb[IPSET_ATTR_LINENO])
+                               *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+                       return ret;
+               }
+       }
+       return ret;
 }
 
-BITMAP_LIST_HEADER(portmap)
-BITMAP_LIST_MEMBERS_SIZE(portmap, ip_set_ip_t, (map->last_ip - map->first_ip + 1),
-                        test_bit(i, map->members))
+static void
+bitmap_port_timeout_destroy(struct ip_set *set)
+{
+       struct bitmap_port_timeout *map = set->data;
+
+       /* gc might be running: del_timer_sync can't be used */
+       while (!del_timer(&map->gc))
+               msleep(IPSET_DESTROY_TIMER_SLEEP);
+
+       ip_set_free(map->members, set->flags);
+       kfree(map);
+       
+       set->data = NULL;
+}
 
 static void
-portmap_list_members(const struct ip_set *set, void *data, char dont_align)
+bitmap_port_timeout_flush(struct ip_set *set)
 {
-       const struct ip_set_portmap *map = set->data;
-       uint32_t i, n = 0;
-       ip_set_ip_t *d;
+       struct bitmap_port_timeout *map = set->data;
        
-       if (dont_align) {
-               memcpy(data, map->members, map->size);
-               return;
+       memset(map->members, 0, map->memsize);
+}
+
+static int
+bitmap_port_timeout_head(struct ip_set *set, struct sk_buff *skb)
+{
+       struct bitmap_port_timeout *map = set->data;
+       struct nlattr *nested;
+       uint32_t id;
+       uint16_t elements, last = map->last_port - map->first_port;
+       
+       for (id = 0, elements = 0; id <= last; id++)
+               if (bitmap_port_timeout_test(map, id))
+                       elements++;
+       
+       nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+       if (!nested)
+               goto nla_put_failure;
+       NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port));
+       NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
+       NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT , htonl(map->timeout));
+       NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements));
+       NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+                     htonl(atomic_read(&set->ref) - 1));
+       NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize));
+       ipset_nest_end(skb, nested);
+       
+       return 0;
+nla_put_failure:
+       return -EFAULT;
+}
+
+static int
+bitmap_port_timeout_list(struct ip_set *set,
+                        struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct bitmap_port_timeout *map = set->data;
+       struct nlattr *adt, *nested;
+       uint16_t id, first = cb->args[2];
+       uint16_t last = map->last_port - map->first_port;
+       unsigned long *table = map->members;
+       
+       adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
+       if (!adt)
+               return -EFAULT;
+       for (; cb->args[2] <= last; cb->args[2]++) {
+               id = cb->args[2];
+               if (!bitmap_port_timeout_test(map, id))
+                       continue;
+               nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+               if (!nested) {
+                       if (id == first) {
+                               nla_nest_cancel(skb, adt);
+                               return -EFAULT;
+                       } else
+                               goto nla_put_failure;
+               }
+               NLA_PUT_NET16(skb, IPSET_ATTR_PORT,
+                             htons(map->first_port + id));
+               NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+                             htonl(ip_set_timeout_get(table[id])));
+               ipset_nest_end(skb, nested);
        }
+       ipset_nest_end(skb, adt);
+
+       /* Set listing finished */
+       cb->args[2] = 0;
        
-       for (i = 0; i < map->last_ip - map->first_ip + 1; i++)
-               if (test_bit(i, map->members)) {
-                       d = data + n * IPSET_ALIGN(sizeof(ip_set_ip_t));
-                       *d = map->first_ip + i;
-                       n++;
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, nested);
+       ipset_nest_end(skb, adt);
+       return 0;
+}
+
+const struct ip_set_type_variant bitmap_port_timeout __read_mostly = {
+       .kadt   = bitmap_port_timeout_kadt,
+       .uadt   = bitmap_port_timeout_uadt,
+       .destroy = bitmap_port_timeout_destroy,
+       .flush  = bitmap_port_timeout_flush,
+       .head   = bitmap_port_timeout_head,
+       .list   = bitmap_port_timeout_list,
+};
+
+static void
+bitmap_port_timeout_gc(unsigned long ul_set)
+{
+       struct ip_set *set = (struct ip_set *) ul_set;
+       struct bitmap_port_timeout *map = set->data;
+       unsigned long *table = map->members;
+       uint16_t id, last = map->last_port - map->first_port;
+       
+       /* We run parallel with other readers (test element)
+        * but adding/deleting new entries is locked out */
+       read_lock_bh(&set->lock);
+       for (id = 0; id <= last; id++)
+               if (ip_set_timeout_expired(table[id]))
+                       table[id] = IPSET_ELEM_UNSET;
+       read_unlock_bh(&set->lock);
+
+       map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+       add_timer(&map->gc);
+}
+
+static inline void
+bitmap_port_timeout_gc_init(struct ip_set *set)
+{
+       struct bitmap_port_timeout *map = set->data;
+
+       init_timer(&map->gc);
+       map->gc.data = (unsigned long) set;
+       map->gc.function = bitmap_port_timeout_gc;
+       map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+       add_timer(&map->gc);
+}
+
+/* Create bitmap:ip type of sets */
+
+static const struct nla_policy
+bitmap_port_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = {
+       [IPSET_ATTR_PORT]       = { .type = NLA_U16 },
+       [IPSET_ATTR_PORT_TO]    = { .type = NLA_U16 },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
+
+static bool
+init_map_port(struct ip_set *set, struct bitmap_port *map,
+             uint16_t first_port, uint16_t last_port)
+{
+       map->members = ip_set_alloc(map->memsize, GFP_KERNEL, &set->flags);
+       if (!map->members)
+               return false;
+       map->first_port = first_port;
+       map->last_port = last_port;
+
+       set->data = map;
+       set->family = AF_UNSPEC;
+       
+       return true;
+}
+
+static int
+bitmap_port_create(struct ip_set *set, struct nlattr *head, int len,
+                uint32_t flags)
+{
+       struct nlattr *tb[IPSET_ATTR_CREATE_MAX];
+       uint16_t first_port, last_port;
+
+       if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len,
+                     bitmap_port_create_policy))
+               return -IPSET_ERR_PROTOCOL;
+       
+       if (tb[IPSET_ATTR_PORT])
+               first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
+       else
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_PORT_TO]) {
+               last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+               if (first_port > last_port) {
+                       uint16_t tmp = first_port;
+                       
+                       first_port = last_port;
+                       last_port = tmp;
+               }
+       } else
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_TIMEOUT]) {
+               struct bitmap_port_timeout *map;
+               
+               map = kzalloc(sizeof(*map), GFP_KERNEL);
+               if (!map)
+                       return -ENOMEM;
+               
+               map->memsize = (last_port - first_port + 1)
+                              * sizeof(unsigned long);
+                              
+               if (!init_map_port(set, (struct bitmap_port *) map,
+                                  first_port, last_port)) {
+                       kfree(map);
+                       return -ENOMEM;
+               }
+
+               map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
+               set->flags |= IP_SET_FLAG_TIMEOUT;
+               set->variant = &bitmap_port_timeout;
+               
+               bitmap_port_timeout_gc_init(set);
+       } else {
+               struct bitmap_port *map;
+               
+               map = kzalloc(sizeof(*map), GFP_KERNEL);
+               if (!map)
+                       return -ENOMEM;
+
+               map->memsize = bitmap_bytes(0, last_port - first_port);
+               D("memsize: %zu", map->memsize);
+               if (!init_map_port(set, map, first_port, last_port)) {
+                       kfree(map);
+                       return -ENOMEM;
                }
+
+               set->variant = &bitmap_port;
+       }
+       return 0;
 }
 
-IP_SET_TYPE(portmap, IPSET_TYPE_PORT | IPSET_DATA_SINGLE)
+struct ip_set_type bitmap_port_type = {
+       .name           = "bitmap:port",
+       .protocol       = IPSET_PROTOCOL,
+       .features       = IPSET_TYPE_PORT,
+       .family         = AF_UNSPEC,
+       .revision       = 0,
+       .create         = bitmap_port_create,
+       .me             = THIS_MODULE,
+};
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("portmap type of IP sets");
+static int __init
+bitmap_port_init(void)
+{
+       return ip_set_type_register(&bitmap_port_type);
+}
+
+static void __exit
+bitmap_port_fini(void)
+{
+       ip_set_type_unregister(&bitmap_port_type);
+}
 
-REGISTER_MODULE(portmap)
+module_init(bitmap_port_init);
+module_exit(bitmap_port_fini);
index 1accbe306c2acb4ab0e78bf52a7af2960b9e4b17..d99c99b4dbc3eb70c5deacdd7bc013e7d0f1fbe8 100644 (file)
-/* Copyright (C) 2003-2008 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
-/* Kernel module implementing an ip hash set */
+/* Kernel module implementing an IP set type: the hash:ip type */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/ip.h>
 #include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_set_jhash.h>
 #include <linux/errno.h>
 #include <asm/uaccess.h>
 #include <asm/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/random.h>
-
 #include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/pfxlen.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ip_set.h>
+#include <linux/netfilter/ip_set_timeout.h>
+#include <linux/netfilter/ip_set_hash.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("hash:ip type of IP sets");
+MODULE_ALIAS("ip_set_hash:ip");
+
+/* Member elements without timeout */
+struct ip4_elem {
+       uint32_t ip;
+};
+
+struct ip6_elem {
+       union nf_inet_addr ip;
+};
+
+/* Member elements with timeout support */
+struct ip4_elem_timeout {
+       uint32_t ip;
+       unsigned long timeout;
+};
 
-#include <linux/netfilter_ipv4/ip_set_iphash.h>
+struct ip6_elem_timeout {
+       union nf_inet_addr ip;
+       unsigned long timeout;
+};
 
-static int limit = MAX_RANGE;
+/* The hash:ip type structure */
+struct hash_ip {
+       void *members;                  /* the set members */
+       uint32_t hashsize;              /* hash size */
+       uint32_t maxelem;               /* max number of elements/hashsize */
+       uint8_t probes;                 /* max number of probes  */
+       uint8_t resize;                 /* resize factor in percent */
+       uint8_t netmask;                /* netmask */
+       uint32_t timeout;               /* timeout value */
+       uint32_t elements;              /* number of elements */
+       struct timer_list gc;           /* garbage collector */
+       size_t elem_size;               /* size of element */
+       initval_t initval[0];           /* initvals for jhash_1word */
+};
 
-static inline __u32
-iphash_id(struct ip_set *set, ip_set_ip_t ip)
+static inline void *
+hash_ip_elem(const struct hash_ip *map, uint32_t id)
 {
-       struct ip_set_iphash *map = set->data;
-       __u32 id;
-       u_int16_t i;
-       ip_set_ip_t *elem;
+       return (void *)((char *)map->members + id * map->elem_size);
+}
 
+static inline unsigned long
+get_ip4_elem_timeout(const struct ip4_elem *elem)
+{
+       return ((const struct ip4_elem_timeout *)elem)->timeout;
+}
 
-       ip &= map->netmask;     
-       DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip));
-       for (i = 0; i < map->probes; i++) {
-               id = jhash_ip(map, i, ip) % map->hashsize;
-               DP("hash key: %u", id);
-               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id);
-               if (*elem == ip)
-                       return id;
-               /* No shortcut - there can be deleted entries. */
-       }
-       return UINT_MAX;
+static inline unsigned long
+get_ip6_elem_timeout(const struct ip6_elem *elem)
+{
+       return ((const struct ip6_elem_timeout *)elem)->timeout;
+}
+
+static inline uint32_t
+ip4_hash(struct ip4_elem *elem, initval_t initval, uint32_t hashsize)
+{
+       return jhash_1word(elem->ip, initval) % hashsize;
+}
+
+static inline uint32_t
+ip6_hash(struct ip6_elem *elem, initval_t initval, uint32_t hashsize)
+{
+       return jhash2((u32 *)&elem->ip, 4, initval) % hashsize;
+}
+
+static inline bool
+ip4_cmp(struct ip4_elem *ip1, struct ip4_elem *ip2)
+{
+       return ip1->ip == ip2->ip;
+}
+
+static inline bool
+ip6_cmp(struct ip6_elem *ip1, struct ip6_elem *ip2)
+{
+       return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6);
+}
+
+static inline bool
+ip4_null(struct ip4_elem *elem)
+{
+       return elem->ip == 0;
+}
+
+static inline bool
+ip6_null(struct ip6_elem *elem)
+{
+       return ipv6_addr_any(&elem->ip.in6);
 }
 
-static inline int
-iphash_test(struct ip_set *set, ip_set_ip_t ip)
+static inline void
+ip4_cpy(struct ip4_elem *dst, const struct ip4_elem *src)
 {
-       return (ip && iphash_id(set, ip) != UINT_MAX);
+       dst->ip = src->ip;
 }
 
-#define KADT_CONDITION
+static inline void
+ip6_cpy(struct ip6_elem *dst, const struct ip6_elem *src)
+{
+       ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+}
 
-UADT(iphash, test)
-KADT(iphash, test, ipaddr)
+/* Zero valued IP addresses (network order) cannot be stored */
+static inline void
+ip4_zero_out(struct ip4_elem *elem)
+{
+       elem->ip = 0;
+}
 
-static inline int
-__iphash_add(struct ip_set_iphash *map, ip_set_ip_t *ip)
+static inline void
+ip6_zero_out(struct ip6_elem *elem)
 {
-       __u32 probe;
-       u_int16_t i;
-       ip_set_ip_t *elem, *slot = NULL;
+       ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0);
+}
+
+static inline void
+ip6_netmask(union nf_inet_addr *ip, uint8_t prefix)
+{
+       ip->ip6[0] &= NETMASK6(prefix)[0];
+       ip->ip6[1] &= NETMASK6(prefix)[1];
+       ip->ip6[2] &= NETMASK6(prefix)[2];
+       ip->ip6[3] &= NETMASK6(prefix)[3];
+}
+
+/* The type variant functions: generic ones */
+
+static void
+hash_ip_destroy(struct ip_set *set)
+{
+       struct hash_ip *map = set->data;
+
+       /* gc might be running: del_timer_sync can't be used */
+       if (set->flags & IP_SET_FLAG_TIMEOUT)
+               while (!del_timer(&map->gc))
+                       msleep(IPSET_DESTROY_TIMER_SLEEP);
+
+       ip_set_free(map->members, set->flags);
+       kfree(map);
        
-       for (i = 0; i < map->probes; i++) {
-               probe = jhash_ip(map, i, *ip) % map->hashsize;
-               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, probe);
-               if (*elem == *ip)
-                       return -EEXIST;
-               if (!(slot || *elem))
-                       slot = elem;
-               /* There can be deleted entries, must check all slots */
-       }
-       if (slot) {
-               *slot = *ip;
-               map->elements++;
-               return 0;
-       }
-       /* Trigger rehashing */
-       return -EAGAIN;
+       set->data = NULL;
 }
 
-static inline int
-iphash_add(struct ip_set *set, ip_set_ip_t ip)
+#define hash_ip4_destroy       hash_ip_destroy
+#define hash_ip6_destroy       hash_ip_destroy
+
+static void
+hash_ip_flush(struct ip_set *set)
 {
-       struct ip_set_iphash *map = set->data;
+       struct hash_ip *map = set->data;
        
-       if (!ip || map->elements >= limit)
-               return -ERANGE;
+       memset(map->members, 0, map->hashsize * map->elem_size);
+       map->elements = 0;
+}
+
+#define hash_ip4_flush         hash_ip_flush
+#define hash_ip6_flush         hash_ip_flush
+
+/* IPv4 variant */
 
-       ip &= map->netmask;
-       return __iphash_add(map, &ip);
+#define PF     4
+#include "ip_set_hash_ip_src.c"
+#undef PF
+
+static int
+hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
+             enum ipset_adt adt, uint8_t pf, const uint8_t *flags)
+{
+       struct hash_ip *map = set->data;
+       bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT;
+       uint32_t ip;
+       
+       if (pf != AF_INET)
+               return -EINVAL;
+
+       ip4addrptr(skb, flags, &ip);
+       ip &= NETMASK(map->netmask);
+       if (ip == 0)
+               return -EINVAL;
+
+       switch (adt) {
+       case IPSET_TEST:
+               return hash_ip4_test(map, with_timeout,
+                                    (struct ip4_elem *)&ip);
+       case IPSET_ADD:
+               return hash_ip4_add(map, with_timeout,
+                                   (struct ip4_elem *)&ip, map->timeout);
+       case IPSET_DEL:
+               return hash_ip4_del(map, with_timeout, (struct ip4_elem *)&ip);
+       default:
+               BUG();
+       }
+       return 0;
 }
 
-UADT(iphash, add)
-KADT(iphash, add, ipaddr)
+static const struct nla_policy
+hash_ip4_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_IP]         = { .type = NLA_U32 },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
 
-static inline void
-__iphash_retry(struct ip_set_iphash *tmp, struct ip_set_iphash *map)
+static int
+hash_ip4_uadt(struct ip_set *set, struct nlattr *head, int len,
+             enum ipset_adt adt, uint32_t *lineno, uint32_t flags)
 {
-       tmp->netmask = map->netmask;
+       struct hash_ip *map = set->data;
+       struct nlattr *tb[IPSET_ATTR_ADT_MAX];
+       bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT;
+       uint32_t ip, timeout = map->timeout;
+
+       if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len,
+                     hash_ip4_adt_policy))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_IP])
+               ip = ip_set_get_n32(tb[IPSET_ATTR_IP]);
+       else
+               return -IPSET_ERR_PROTOCOL;
+
+       ip &= NETMASK(map->netmask);
+       if (ip == 0)
+               return -IPSET_ERR_HASH_ELEM;
+
+       if (tb[IPSET_ATTR_TIMEOUT]) {
+               if (!with_timeout)
+                       return -IPSET_ERR_TIMEOUT;
+               timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
+       }
+
+       switch (adt) {
+       case IPSET_TEST:
+               return hash_ip4_test(map, with_timeout,
+                                    (struct ip4_elem *)&ip);
+       case IPSET_ADD:
+               return hash_ip4_add(map, with_timeout,
+                                   (struct ip4_elem *)&ip, timeout);
+       case IPSET_DEL:
+               return hash_ip4_del(map, with_timeout,
+                                   (struct ip4_elem *)&ip);
+       default:
+               BUG();
+       }
+
+       return 0;
 }
 
-HASH_RETRY(iphash, ip_set_ip_t)
+/* IPv6 variants */
+
+#define PF     6
+#include "ip_set_hash_ip_src.c"
+#undef PF
 
-static inline int
-iphash_del(struct ip_set *set, ip_set_ip_t ip)
+static int
+hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
+             enum ipset_adt adt, uint8_t pf, const uint8_t *flags)
 {
-       struct ip_set_iphash *map = set->data;
-       ip_set_ip_t id, *elem;
+       struct hash_ip *map = set->data;
+       bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT;
+       union nf_inet_addr ip;
 
-       if (!ip)
-               return -ERANGE;
+       if (pf != AF_INET6)
+               return -EINVAL;
 
-       id = iphash_id(set, ip);
-       if (id == UINT_MAX)
-               return -EEXIST;
-               
-       elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id);
-       *elem = 0;
-       map->elements--;
+       ip6addrptr(skb, flags, &ip.in6);
+       ip6_netmask(&ip, map->netmask);
+       if (ipv6_addr_any(&ip.in6))
+               return -EINVAL;
 
+       switch (adt) {
+       case IPSET_TEST:
+               return hash_ip6_test(map, with_timeout,
+                                    (struct ip6_elem *)&ip);
+       case IPSET_ADD:
+               return hash_ip6_add(map, with_timeout,
+                                   (struct ip6_elem *)&ip, map->timeout);
+       case IPSET_DEL:
+               return hash_ip6_del(map, with_timeout,
+                                   (struct ip6_elem *)&ip);
+       default:
+               BUG();
+       }
        return 0;
 }
 
-UADT(iphash, del)
-KADT(iphash, del, ipaddr)
+static const struct nla_policy
+hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = {
+       [IPSET_ATTR_IP]         = { .type = NLA_BINARY,
+                                   .len = sizeof(struct in6_addr) },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
 
-static inline int
-__iphash_create(const struct ip_set_req_iphash_create *req,
-               struct ip_set_iphash *map)
+static int
+hash_ip6_uadt(struct ip_set *set, struct nlattr *head, int len,
+             enum ipset_adt adt, uint32_t *lineno, uint32_t flags)
 {
-       map->netmask = req->netmask;
+       struct hash_ip *map = set->data;
+       struct nlattr *tb[IPSET_ATTR_ADT_MAX];
+       union nf_inet_addr *ip;
+       bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT;
+       uint32_t timeout = map->timeout;
+
+       if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len,
+                     hash_ip6_adt_policy))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_IP])
+               ip = nla_data(tb[IPSET_ATTR_IP]);
+       else
+               return -IPSET_ERR_PROTOCOL;
+
+       ip6_netmask(ip, map->netmask);
+       if (ipv6_addr_any(&ip->in6))
+               return -IPSET_ERR_HASH_ELEM;
+
+       if (tb[IPSET_ATTR_TIMEOUT]) {
+               if (!with_timeout)
+                       return -IPSET_ERR_TIMEOUT;
+               timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
+       }
+
+       switch (adt) {
+       case IPSET_TEST:
+               return hash_ip6_test(map, with_timeout,
+                                    (struct ip6_elem *)ip);
+       case IPSET_ADD:
+               return hash_ip6_add(map, with_timeout,
+                                   (struct ip6_elem *)ip, timeout);
+       case IPSET_DEL:
+               return hash_ip6_del(map, with_timeout,
+                                   (struct ip6_elem *)ip);
+       default:
+               BUG();
+       }
        
        return 0;
 }
 
-HASH_CREATE(iphash, ip_set_ip_t)
-HASH_DESTROY(iphash)
+/* Create hash:ip type of sets */
 
-HASH_FLUSH(iphash, ip_set_ip_t)
+static const struct nla_policy
+hash_ip_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = {
+       [IPSET_ATTR_HASHSIZE]   = { .type = NLA_U32 },
+       [IPSET_ATTR_MAXELEM]    = { .type = NLA_U32 },
+       [IPSET_ATTR_PROBES]     = { .type = NLA_U8 },
+       [IPSET_ATTR_RESIZE]     = { .type = NLA_U8  },
+       [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+};
 
-static inline void
-__iphash_list_header(const struct ip_set_iphash *map,
-                    struct ip_set_req_iphash_create *header)
-{    
-       header->netmask = map->netmask;
+static bool
+init_map_ip(struct ip_set *set, struct hash_ip *map, uint32_t maxelem,
+           uint32_t probes, uint32_t resize, uint8_t netmask, uint8_t family)
+{
+       map->members = ip_set_alloc(map->hashsize * map->elem_size,
+                                   GFP_KERNEL, &set->flags);
+       if (!map->members)
+               return false;
+
+       map->maxelem = maxelem;
+       map->probes = probes;
+       map->resize = resize;
+       map->netmask = netmask;
+
+       set->data = map;
+       set->family = family;
+       
+       return true;
 }
 
-HASH_LIST_HEADER(iphash)
-HASH_LIST_MEMBERS_SIZE(iphash, ip_set_ip_t)
-HASH_LIST_MEMBERS(iphash, ip_set_ip_t)
+static int
+hash_ip_create(struct ip_set *set, struct nlattr *head, int len,
+                uint32_t flags)
+{
+       struct nlattr *tb[IPSET_ATTR_CREATE_MAX];
+       uint32_t hashsize, maxelem;
+       uint8_t probes, resize, netmask, family, i;
+       struct hash_ip *map;
 
-IP_SET_RTYPE(iphash, IPSET_TYPE_IP | IPSET_DATA_SINGLE)
+       if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len,
+                     hash_ip_create_policy))
+               return -IPSET_ERR_PROTOCOL;
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("iphash type of IP sets");
-module_param(limit, int, 0600);
-MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
+       hashsize = IPSET_DEFAULT_HASHSIZE;
+       maxelem = IPSET_DEFAULT_MAXELEM;
+       probes = IPSET_DEFAULT_PROBES;
+       resize = IPSET_DEFAULT_RESIZE;
+       family = AF_INET;
+
+       if (tb[IPSET_ATTR_HASHSIZE])
+               hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+
+       if (tb[IPSET_ATTR_MAXELEM])
+               maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+       if (tb[IPSET_ATTR_PROBES])
+               probes = nla_get_u8(tb[IPSET_ATTR_PROBES]);
+
+       if (tb[IPSET_ATTR_RESIZE])
+               resize = nla_get_u8(tb[IPSET_ATTR_RESIZE]);
+
+       if (tb[IPSET_ATTR_FAMILY])
+               family = nla_get_u8(tb[IPSET_ATTR_FAMILY]);
+       if (!(family == AF_INET || family == AF_INET6))
+               return -IPSET_ERR_INVALID_FAMILY;
+       netmask = family == AF_INET ? 32 : 128;
+
+       if (tb[IPSET_ATTR_NETMASK]) {
+               netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
+               
+               if ((family == AF_INET && netmask > 32)
+                   || (family == AF_INET6 && netmask > 128))
+                       return -IPSET_ERR_INVALID_NETMASK;
+       }
+
+       map = kzalloc(sizeof(*map) + probes * sizeof(initval_t), GFP_KERNEL);
+       if (!map)
+               return -ENOMEM;
+               
+       map->hashsize = hashsize;
+       if (tb[IPSET_ATTR_TIMEOUT]) {
+               map->elem_size = family == AF_INET
+                                       ? sizeof(struct ip4_elem_timeout)
+                                       : sizeof(struct ip6_elem_timeout);
+
+               if (!init_map_ip(set, map, maxelem, probes, resize, netmask, 
+                                family)) {
+                       kfree(map);
+                       return -ENOMEM;
+               }
+
+               map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]);
+               set->flags |= IP_SET_FLAG_TIMEOUT;
+               
+               if (family == AF_INET)
+                       hash_ip4_gc_init(set);
+               else
+                       hash_ip6_gc_init(set);
+       } else {
+               map->elem_size = family == AF_INET
+                                       ? sizeof(struct ip4_elem)
+                                       : sizeof(struct ip6_elem);
+
+               if (!init_map_ip(set, map, maxelem, probes, resize, netmask,
+                                family)) {
+                       kfree(map);
+                       return -ENOMEM;
+               }
+       }
+       for (i = 0; i < map->probes; i++)
+               get_random_bytes(((initval_t *) map->initval)+i,
+                                sizeof(initval_t));
+       
+       set->variant = family == AF_INET ? &hash_ip4 : &hash_ip6;
+       D("create %s hashsize %u maxelem %u probes %u resize %u",
+          set->name, map->hashsize, map->maxelem, map->probes, map->resize);
+          
+       return 0;
+}
+
+static struct ip_set_type hash_ip_type = {
+       .name           = "hash:ip",
+       .protocol       = IPSET_PROTOCOL,
+       .features       = IPSET_TYPE_IP,
+       .family         = AF_UNSPEC,
+       .revision       = 0,
+       .create         = hash_ip_create,
+       .me             = THIS_MODULE,
+};
+
+static int __init
+hash_ip_init(void)
+{
+       return ip_set_type_register(&hash_ip_type);
+}
+
+static void __exit
+hash_ip_fini(void)
+{
+       ip_set_type_unregister(&hash_ip_type);
+}
 
-REGISTER_MODULE(iphash)
+module_init(hash_ip_init);
+module_exit(hash_ip_fini);