patch-2.3.99-pre2 linux/net/ipv4/netfilter/ipchains_core.c

Next file: linux/net/ipv4/netfilter/ipfwadm_core.c
Previous file: linux/net/ipv4/netfilter/ip_tables.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.99-pre1/linux/net/ipv4/netfilter/ipchains_core.c linux/net/ipv4/netfilter/ipchains_core.c
@@ -0,0 +1,1768 @@
+/* Minor modifications to fit on compatibility framework:
+   Rusty.Russell@rustcorp.com.au
+*/
+
+/*
+ * This code is heavily based on the code on the old ip_fw.c code; see below for
+ * copyrights and attributions of the old code.  This code is basically GPL.
+ *
+ * 15-Aug-1997: Major changes to allow graphs for firewall rules.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au> and
+ *		Michael Neuling <Michael.Neuling@rustcorp.com.au>
+ * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
+ *              Added explicit RETURN from chains.
+ *              Removed TOS mangling (done in ipchains 1.0.1).
+ *              Fixed read & reset bug by reworking proc handling.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au>
+ * 28-Sep-1997: Added packet marking for net sched code.
+ *              Removed fw_via comparisons: all done on device name now,
+ *              similar to changes in ip_fw.c in DaveM's CVS970924 tree.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au>
+ * 2-Nov-1997:  Moved types across to __u16, etc.
+ *              Added inverse flags.
+ *              Fixed fragment bug (in args to port_match).
+ *              Changed mark to only one flag (MARKABS).
+ * 21-Nov-1997: Added ability to test ICMP code.
+ * 19-Jan-1998: Added wildcard interfaces.
+ * 6-Feb-1998:  Merged 2.0 and 2.1 versions.
+ *              Initialised ip_masq for 2.0.x version.
+ *              Added explicit NETLINK option for 2.1.x version.
+ *              Added packet and byte counters for policy matches.
+ * 26-Feb-1998: Fixed race conditions, added SMP support.
+ * 18-Mar-1998: Fix SMP, fix race condition fix.
+ * 1-May-1998:  Remove caching of device pointer.
+ * 12-May-1998: Allow tiny fragment case for TCP/UDP.
+ * 15-May-1998: Treat short packets as fragments, don't just block.
+ * 3-Jan-1999:  Fixed serious procfs security hole -- users should never
+ *              be allowed to view the chains!
+ *              Marc Santoro <ultima@snicker.emoti.com>
+ * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash
+ *              during dump_packet. --RR.
+ * 19-May-1999: Star Wars: The Phantom Menace opened.  Rule num
+ *		printed in log (modified from Michael Hasenstein's patch).
+ *		Added SYN in log message. --RR
+ * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998.
+ *              John McDonald <jm@dataprotect.com>
+ *              Thomas Lopatic <tl@dataprotect.com>
+ */
+
+/*
+ *
+ * The origina Linux port was done Alan Cox, with changes/fixes from
+ * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan
+ * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others.
+ *
+ * Copyright from the original FreeBSD version follows:
+ *
+ * Copyright (c) 1993 Daniel Boulet
+ * Copyright (c) 1994 Ugen J.S.Antsilevich
+ *
+ * Redistribution and use in source forms, with and without modification,
+ * are permitted provided that this entire comment appears intact.
+ *
+ * Redistribution in binary form may occur without any restrictions.
+ * Obviously, it would be nice if you gave credit where credit is due
+ * but requiring it would be too onerous.
+ *
+ * This software is provided ``AS IS'' without any warranties of any kind.  */
+
+#include <linux/config.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmp.h>
+#include <linux/udp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/compat_firewall.h>
+#include <linux/netfilter_ipv4/ipchains_core.h>
+
+#include <net/checksum.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+/* Understanding locking in this code: (thanks to Alan Cox for using
+ * little words to explain this to me). -- PR
+ *
+ * In UP, there can be two packets traversing the chains:
+ * 1) A packet from the current userspace context
+ * 2) A packet off the bh handlers (timer or net).
+ *
+ * For SMP (kernel v2.1+), multiply this by # CPUs.
+ *
+ * [Note that this in not correct for 2.2 - because the socket code always
+ *  uses lock_kernel() to serialize, and bottom halves (timers and net_bhs)
+ *  only run on one CPU at a time.  This will probably change for 2.3.
+ *  It is still good to use spinlocks because that avoids the global cli()
+ *  for updating the tables, which is rather costly in SMP kernels -AK]
+ *
+ * This means counters and backchains can get corrupted if no precautions
+ * are taken.
+ *
+ * To actually alter a chain on UP, we need only do a cli(), as this will
+ * stop a bh handler firing, as we are in the current userspace context
+ * (coming from a setsockopt()).
+ *
+ * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
+ * UP.
+ *
+ * For backchains and counters, we use an array, indexed by
+ * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of
+ * size [smp_num_cpus*2].  For v2.0, smp_num_cpus is effectively 1.  So,
+ * confident of uniqueness, we modify counters even though we only
+ * have a read lock (to read the counters, you need a write lock,
+ * though).  */
+
+/* Why I didn't use straight locking... -- PR
+ *
+ * The backchains can be separated out of the ip_chains structure, and
+ * allocated as needed inside ip_fw_check().
+ *
+ * The counters, however, can't.  Trying to lock these means blocking
+ * interrupts every time we want to access them.  This would suck HARD
+ * performance-wise.  Not locking them leads to possible corruption,
+ * made worse on 32-bit machines (counters are 64-bit).  */
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+/*#define DEBUG_IP_FIREWALL_LOCKING*/
+
+static struct sock *ipfwsk;
+
+#ifdef CONFIG_SMP
+#define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt())
+#else /* !SMP */
+#define SLOT_NUMBER() (!in_interrupt())
+#endif /* CONFIG_SMP */
+#define NUM_SLOTS (smp_num_cpus*2)
+
+#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
+				+ NUM_SLOTS*sizeof(struct ip_reent))
+#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
+				    + NUM_SLOTS*sizeof(struct ip_counters))
+
+#ifdef DEBUG_IP_FIREWALL_LOCKING
+static unsigned int fwc_rlocks, fwc_wlocks;
+#define FWC_DEBUG_LOCK(d)			\
+do {						\
+	FWC_DONT_HAVE_LOCK(d);			\
+	d |= (1 << SLOT_NUMBER());		\
+} while (0)
+
+#define FWC_DEBUG_UNLOCK(d)			\
+do {						\
+	FWC_HAVE_LOCK(d);			\
+	d &= ~(1 << SLOT_NUMBER());		\
+} while (0)
+
+#define FWC_DONT_HAVE_LOCK(d)					\
+do {								\
+	if ((d) & (1 << SLOT_NUMBER()))				\
+		printk("%s:%i: Got lock on %i already!\n", 	\
+		       __FILE__, __LINE__, SLOT_NUMBER());	\
+} while(0)
+
+#define FWC_HAVE_LOCK(d)				\
+do {							\
+	if (!((d) & (1 << SLOT_NUMBER())))		\
+	printk("%s:%i:No lock on %i!\n", 		\
+	       __FILE__, __LINE__, SLOT_NUMBER());	\
+} while (0)
+
+#else
+#define FWC_DEBUG_LOCK(d) do { } while(0)
+#define FWC_DEBUG_UNLOCK(d) do { } while(0)
+#define FWC_DONT_HAVE_LOCK(d) do { } while(0)
+#define FWC_HAVE_LOCK(d) do { } while(0)
+#endif /*DEBUG_IP_FIRWALL_LOCKING*/
+
+#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
+#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
+#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
+#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
+#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
+#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
+#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
+#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
+
+struct ip_chain;
+
+struct ip_counters
+{
+	__u64 pcnt, bcnt;			/* Packet and byte counters */
+};
+
+struct ip_fwkernel
+{
+	struct ip_fw ipfw;
+	struct ip_fwkernel *next;	/* where to go next if current
+					 * rule doesn't match */
+	struct ip_chain *branch;	/* which branch to jump to if
+					 * current rule matches */
+	int simplebranch;		/* Use this if branch == NULL */
+	struct ip_counters counters[0]; /* Actually several of these */
+};
+
+struct ip_reent
+{
+	struct ip_chain *prevchain;	/* Pointer to referencing chain */
+	struct ip_fwkernel *prevrule;	/* Pointer to referencing rule */
+	struct ip_counters counters;
+};
+
+struct ip_chain
+{
+	ip_chainlabel label;	    /* Defines the label for each block */
+ 	struct ip_chain *next;	    /* Pointer to next block */
+	struct ip_fwkernel *chain;  /* Pointer to first rule in block */
+	__u32 refcount; 	    /* Number of refernces to block */
+	int policy;		    /* Default rule for chain.  Only *
+				     * used in built in chains */
+	struct ip_reent reent[0];   /* Actually several of these */
+};
+
+/*
+ *	Implement IP packet firewall
+ */
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...)  printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Lock around ip_fw_chains linked list structure */
+rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED;
+
+/* Head of linked list of fw rules */
+static struct ip_chain *ip_fw_chains;
+
+#define IP_FW_INPUT_CHAIN ip_fw_chains
+#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
+#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+extern inline int port_match(__u16 min, __u16 max, __u16 port,
+			     int frag, int invert)
+{
+	if (frag) /* Fragments fail ANY port test. */
+		return (min == 0 && max == 0xFFFF);
+	else return (port >= min && port <= max) ^ invert;
+}
+
+/* Returns whether matches rule or not. */
+static int ip_rule_match(struct ip_fwkernel *f,
+			 const char *ifname,
+			 struct iphdr *ip,
+			 char tcpsyn,
+			 __u16 src_port, __u16 dst_port,
+			 char isfrag)
+{
+#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
+	/*
+	 *	This is a bit simpler as we don't have to walk
+	 *	an interface chain as you do in BSD - same logic
+	 *	however.
+	 */
+
+	if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
+		  IP_FW_INV_SRCIP)
+	    || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
+		     IP_FW_INV_DSTIP)) {
+		dprintf("Source or dest mismatch.\n");
+
+		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
+			f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
+			f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
+		dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
+			f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
+			f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
+		return 0;
+	}
+
+	/*
+	 *	Look for a VIA device match
+	 */
+	if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
+	    if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
+			      strlen(f->ipfw.fw_vianame)) != 0,
+		      IP_FW_INV_VIA)) {
+		dprintf("Wildcard interface mismatch.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
+		return 0;	/* Mismatch */
+	    }
+	}
+	else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
+		       IP_FW_INV_VIA)) {
+	    dprintf("Interface name does not match.%s\n",
+		    f->ipfw.fw_invflg & IP_FW_INV_VIA
+		    ? " (INV)" : "");
+	    return 0;	/* Mismatch */
+	}
+
+	/*
+	 *	Ok the chain addresses match.
+	 */
+
+	/* If we have a fragment rule but the packet is not a fragment
+	 * the we return zero */
+	if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) {
+		dprintf("Fragment rule but not fragment.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
+		return 0;
+	}
+
+	/* Fragment NEVER passes a SYN test, even an inverted one. */
+	if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
+	    || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
+		dprintf("Rule requires SYN and packet has no SYN.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
+		return 0;
+	}
+
+	if (f->ipfw.fw_proto) {
+		/*
+		 *	Specific firewall - packet's protocol
+		 *	must match firewall's.
+		 */
+
+		if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
+			dprintf("Packet protocol %hi does not match %hi.%s\n",
+				ip->protocol, f->ipfw.fw_proto,
+				f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
+			return 0;
+		}
+
+		/* For non TCP/UDP/ICMP, port range is max anyway. */
+		if (!port_match(f->ipfw.fw_spts[0],
+				f->ipfw.fw_spts[1],
+				src_port, isfrag,
+				!!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
+		    || !port_match(f->ipfw.fw_dpts[0],
+				   f->ipfw.fw_dpts[1],
+				   dst_port, isfrag,
+				   !!(f->ipfw.fw_invflg
+				      &IP_FW_INV_DSTPT))) {
+		    dprintf("Port match failed.\n");
+		    return 0;
+		}
+	}
+
+	dprintf("Match succeeded.\n");
+	return 1;
+}
+
+static const char *branchname(struct ip_chain *branch,int simplebranch)
+{
+	if (branch)
+		return branch->label;
+	switch (simplebranch)
+	{
+	case FW_BLOCK: return IP_FW_LABEL_BLOCK;
+	case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
+	case FW_REJECT: return IP_FW_LABEL_REJECT;
+	case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
+	case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
+	case FW_SKIP: return "-";
+	case FW_SKIP+1: return IP_FW_LABEL_RETURN;
+	default:
+		return "UNKNOWN";
+	}
+}
+
+/*
+ * VERY ugly piece of code which actually
+ * makes kernel printf for matching packets...
+ */
+static void dump_packet(const struct iphdr *ip,
+			const char *ifname,
+			struct ip_fwkernel *f,
+			const ip_chainlabel chainlabel,
+			__u16 src_port,
+			__u16 dst_port,
+			unsigned int count,
+			int syn)
+{
+	__u32 *opt = (__u32 *) (ip + 1);
+	int opti;
+
+	if (f) {
+		printk(KERN_INFO "Packet log: %s ",chainlabel);
+		printk("%s ",branchname(f->branch,f->simplebranch));
+		if (f->simplebranch==FW_REDIRECT)
+			printk("%d ",f->ipfw.fw_redirpt);
+	}
+
+	printk("%s PROTO=%d %d.%d.%d.%d:%hu %d.%d.%d.%d:%hu"
+	       " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
+	       ifname, ip->protocol,
+	       (ntohl(ip->saddr)>>24)&0xFF,
+	       (ntohl(ip->saddr)>>16)&0xFF,
+	       (ntohl(ip->saddr)>>8)&0xFF,
+	       (ntohl(ip->saddr))&0xFF,
+	       src_port,
+	       (ntohl(ip->daddr)>>24)&0xFF,
+	       (ntohl(ip->daddr)>>16)&0xFF,
+	       (ntohl(ip->daddr)>>8)&0xFF,
+	       (ntohl(ip->daddr))&0xFF,
+	       dst_port,
+	       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+	       ntohs(ip->frag_off), ip->ttl);
+
+	for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+		printk(" O=0x%8.8X", *opt++);
+	printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count);
+}
+
+/* function for checking chain labels for user space. */
+static int check_label(ip_chainlabel label)
+{
+	unsigned int i;
+	/* strlen must be < IP_FW_MAX_LABEL_LENGTH. */
+	for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++)
+		if (label[i] == '\0') return 1;
+
+	return 0;
+}
+
+/*	This function returns a pointer to the first chain with a label
+ *	that matches the one given. */
+static struct ip_chain *find_label(ip_chainlabel label)
+{
+	struct ip_chain *tmp;
+	FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
+	for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
+		if (strcmp(tmp->label,label) == 0)
+			break;
+	return tmp;
+}
+
+/* This function returns a boolean which when true sets answer to one
+   of the FW_*. */
+static int find_special(ip_chainlabel label, int *answer)
+{
+	if (label[0] == '\0') {
+		*answer = FW_SKIP; /* => pass-through rule */
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
+		*answer = FW_ACCEPT;
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
+		*answer = FW_BLOCK;
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
+		*answer = FW_REJECT;
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
+		*answer = FW_REDIRECT;
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
+		*answer = FW_MASQUERADE;
+		return 1;
+	} else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
+		*answer = FW_SKIP+1;
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+/* This function cleans up the prevchain and prevrule.  If the verbose
+ * flag is set then he names of the chains will be printed as it
+ * cleans up.  */
+static void cleanup(struct ip_chain *chain,
+		    const int verbose,
+		    unsigned int slot)
+{
+	struct ip_chain *tmpchain = chain->reent[slot].prevchain;
+	if (verbose)
+		printk(KERN_ERR "Chain backtrace: ");
+	while (tmpchain) {
+		if (verbose)
+			printk("%s<-",chain->label);
+		chain->reent[slot].prevchain = NULL;
+		chain = tmpchain;
+		tmpchain = chain->reent[slot].prevchain;
+	}
+	if (verbose)
+		printk("%s\n",chain->label);
+}
+
+static inline int
+ip_fw_domatch(struct ip_fwkernel *f,
+	      struct iphdr *ip,
+	      const char *rif,
+	      const ip_chainlabel label,
+	      struct sk_buff *skb,
+	      unsigned int slot,
+	      __u16 src_port, __u16 dst_port,
+	      unsigned int count,
+	      int tcpsyn)
+{
+	f->counters[slot].bcnt+=ntohs(ip->tot_len);
+	f->counters[slot].pcnt++;
+	if (f->ipfw.fw_flg & IP_FW_F_PRN) {
+		dump_packet(ip,rif,f,label,src_port,dst_port,count,tcpsyn);
+	}
+	ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor;
+
+/* This functionality is useless in stock 2.0.x series, but we don't
+ * discard the mark thing altogether, to avoid breaking ipchains (and,
+ * more importantly, the ipfwadm wrapper) --PR */
+	if (f->ipfw.fw_flg & IP_FW_F_MARKABS) {
+		skb->nfmark = f->ipfw.fw_mark;
+	} else {
+		skb->nfmark += f->ipfw.fw_mark;
+	}
+	if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
+		size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len))
+			+ sizeof(__u32) + sizeof(skb->nfmark) + IFNAMSIZ;
+		struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
+
+		duprintf("Sending packet out NETLINK (length = %u).\n",
+			 (unsigned int)len);
+		if (outskb) {
+			/* Prepend length, mark & interface */
+			skb_put(outskb, len);
+			*((__u32 *)outskb->data) = (__u32)len;
+			*((__u32 *)(outskb->data+sizeof(__u32))) = skb->nfmark;
+			strcpy(outskb->data+sizeof(__u32)*2, rif);
+			memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip,
+			       len-(sizeof(__u32)*2+IFNAMSIZ));
+			netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL);
+		}
+		else {
+#endif
+			if (net_ratelimit())
+				printk(KERN_WARNING "ip_fw: packet drop due to "
+				       "netlink failure\n");
+			return 0;
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
+		}
+#endif
+	}
+	return 1;
+}
+
+/*
+ *	Returns one of the generic firewall policies, like FW_ACCEPT.
+ *
+ *	The testing is either false for normal firewall mode or true for
+ *	user checking mode (counters are not updated, TOS & mark not done).
+ */
+static int
+ip_fw_check(struct iphdr *ip,
+	    const char *rif,
+	    __u16 *redirport,
+	    struct ip_chain *chain,
+	    struct sk_buff *skb,
+	    unsigned int slot,
+	    int testing)
+{
+	struct tcphdr		*tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
+	struct udphdr		*udp=(struct udphdr *)((__u32 *)ip+ip->ihl);
+	struct icmphdr		*icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl);
+	__u32			src, dst;
+	__u16			src_port = 0xFFFF, dst_port = 0xFFFF;
+	char			tcpsyn=0;
+	__u16			offset;
+	unsigned char		oldtos;
+	struct ip_fwkernel	*f;
+	int			ret = FW_SKIP+2;
+	unsigned int		count;
+
+	/* We handle fragments by dealing with the first fragment as
+	 * if it was a normal packet.  All other fragments are treated
+	 * normally, except that they will NEVER match rules that ask
+	 * things we don't know, ie. tcp syn flag or ports).  If the
+	 * rule is also a fragment-specific rule, non-fragments won't
+	 * match it. */
+
+	offset = ntohs(ip->frag_off) & IP_OFFSET;
+
+	/*
+	 *	Don't allow a fragment of TCP 8 bytes in. Nobody
+	 *	normal causes this. Its a cracker trying to break
+	 *	in by doing a flag overwrite to pass the direction
+	 *	checks.
+	 */
+	if (offset == 1 && ip->protocol == IPPROTO_TCP)	{
+		if (!testing && net_ratelimit()) {
+			printk("Suspect TCP fragment.\n");
+			dump_packet(ip,rif,NULL,NULL,0,0,0,0);
+		}
+		return FW_BLOCK;
+	}
+
+	/* If we can't investigate ports, treat as fragment.  It's
+	 * either a trucated whole packet, or a truncated first
+	 * fragment, or a TCP first fragment of length 8-15, in which
+	 * case the above rule stops reassembly.
+	 */
+	if (offset == 0) {
+		unsigned int size_req;
+		switch (ip->protocol) {
+		case IPPROTO_TCP:
+			/* Don't care about things past flags word */
+			size_req = 16;
+			break;
+
+		case IPPROTO_UDP:
+		case IPPROTO_ICMP:
+			size_req = 8;
+			break;
+
+		default:
+			size_req = 0;
+		}
+
+		/* If it is a truncated first fragment then it can be
+		 * used to rewrite port information, and thus should
+		 * be blocked.
+		 */
+		if (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req) {
+			if (!testing && net_ratelimit()) {
+				printk("Suspect short first fragment.\n");
+				dump_packet(ip,rif,NULL,NULL,0,0,0,0);
+			}
+			return FW_BLOCK;
+		}
+	}
+
+	src = ip->saddr;
+	dst = ip->daddr;
+	oldtos = ip->tos;
+
+	/*
+	 *	If we got interface from which packet came
+	 *	we can use the address directly. Linux 2.1 now uses address
+	 *	chains per device too, but unlike BSD we first check if the
+	 *	incoming packet matches a device address and the routing
+	 *	table before calling the firewall.
+	 */
+
+	dprintf("Packet ");
+	switch(ip->protocol)
+	{
+		case IPPROTO_TCP:
+			dprintf("TCP ");
+			if (!offset) {
+				src_port=ntohs(tcp->source);
+				dst_port=ntohs(tcp->dest);
+
+				/* Connection initilisation can only
+				 * be made when the syn bit is set and
+				 * neither of the ack or reset is
+				 * set. */
+				if(tcp->syn && !(tcp->ack || tcp->rst))
+					tcpsyn=1;
+			}
+			break;
+		case IPPROTO_UDP:
+			dprintf("UDP ");
+			if (!offset) {
+				src_port=ntohs(udp->source);
+				dst_port=ntohs(udp->dest);
+			}
+			break;
+		case IPPROTO_ICMP:
+			if (!offset) {
+				src_port=(__u16)icmp->type;
+				dst_port=(__u16)icmp->code;
+			}
+			dprintf("ICMP ");
+			break;
+		default:
+			dprintf("p=%d ",ip->protocol);
+			break;
+	}
+#ifdef DEBUG_IP_FIREWALL
+	print_ip(ip->saddr);
+
+	if (offset)
+		dprintf(":fragment (%i) ", ((int)offset)<<2);
+	else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP
+		 || ip->protocol==IPPROTO_ICMP)
+		dprintf(":%hu:%hu", src_port, dst_port);
+	dprintf("\n");
+#endif
+
+	if (!testing) FWC_READ_LOCK(&ip_fw_lock);
+	else FWC_HAVE_LOCK(fwc_rlocks);
+
+	f = chain->chain;
+	do {
+		count = 0;
+		for (; f; f = f->next) {
+			count++;
+			if (ip_rule_match(f,rif,ip,
+					  tcpsyn,src_port,dst_port,offset)) {
+				if (!testing
+				    && !ip_fw_domatch(f, ip, rif, chain->label,
+						      skb, slot,
+						      src_port, dst_port,
+						      count, tcpsyn)) {
+					ret = FW_BLOCK;
+					goto out;
+				}
+				break;
+			}
+		}
+		if (f) {
+			if (f->branch) {
+				/* Do sanity check to see if we have
+                                 * already set prevchain and if so we
+                                 * must be in a loop */
+				if (f->branch->reent[slot].prevchain) {
+					if (!testing) {
+						printk(KERN_ERR
+						       "IP firewall: "
+						       "Loop detected "
+						       "at `%s'.\n",
+						       f->branch->label);
+						cleanup(chain, 1, slot);
+						ret = FW_BLOCK;
+					} else {
+						cleanup(chain, 0, slot);
+						ret = FW_SKIP+1;
+					}
+				}
+				else {
+					f->branch->reent[slot].prevchain
+						= chain;
+					f->branch->reent[slot].prevrule
+						= f->next;
+					chain = f->branch;
+					f = chain->chain;
+				}
+			}
+			else if (f->simplebranch == FW_SKIP)
+				f = f->next;
+			else if (f->simplebranch == FW_SKIP+1) {
+				/* Just like falling off the chain */
+				goto fall_off_chain;
+			} else {
+				cleanup(chain, 0, slot);
+				ret = f->simplebranch;
+			}
+		} /* f == NULL */
+		else {
+		fall_off_chain:
+			if (chain->reent[slot].prevchain) {
+				struct ip_chain *tmp = chain;
+				f = chain->reent[slot].prevrule;
+				chain = chain->reent[slot].prevchain;
+				tmp->reent[slot].prevchain = NULL;
+			}
+			else {
+				ret = chain->policy;
+				if (!testing) {
+					chain->reent[slot].counters.pcnt++;
+					chain->reent[slot].counters.bcnt
+						+= ntohs(ip->tot_len);
+				}
+			}
+		}
+	} while (ret == FW_SKIP+2);
+
+ out:
+	if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
+
+	/* Recalculate checksum if not going to reject, and TOS changed. */
+	if (ip->tos != oldtos
+	    && ret != FW_REJECT && ret != FW_BLOCK
+	    && !testing)
+		ip_send_check(ip);
+
+	if (ret == FW_REDIRECT && redirport) {
+		if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
+			/* Wildcard redirection.
+			 * Note that redirport will become
+			 * 0xFFFF for non-TCP/UDP packets.
+			 */
+			*redirport = htons(dst_port);
+		}
+	}
+
+#ifdef DEBUG_ALLOW_ALL
+	return (testing ? ret : FW_ACCEPT);
+#else
+	return ret;
+#endif
+}
+
+/* Must have write lock & interrupts off for any of these */
+
+/* This function sets all the byte counters in a chain to zero.  The
+ * input is a pointer to the chain required for zeroing */
+static int zero_fw_chain(struct ip_chain *chainptr)
+{
+	struct ip_fwkernel *i;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (i = chainptr->chain; i; i = i->next)
+		memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
+	return 0;
+}
+
+static int clear_fw_chain(struct ip_chain *chainptr)
+{
+	struct ip_fwkernel *i= chainptr->chain;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	chainptr->chain=NULL;
+
+	while (i) {
+		struct ip_fwkernel *tmp = i->next;
+		if (i->branch)
+			i->branch->refcount--;
+		kfree(i);
+		i = tmp;
+	}
+	return 0;
+}
+
+static int replace_in_chain(struct ip_chain *chainptr,
+			    struct ip_fwkernel *frwl,
+			    __u32 position)
+{
+	struct ip_fwkernel *f = chainptr->chain;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+
+	while (--position && f != NULL) f = f->next;
+	if (f == NULL)
+		return EINVAL;
+
+	if (f->branch) f->branch->refcount--;
+	if (frwl->branch) frwl->branch->refcount++;
+
+	frwl->next = f->next;
+	memcpy(f,frwl,sizeof(struct ip_fwkernel));
+	kfree(frwl);
+	return 0;
+}
+
+static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
+{
+	struct ip_fwkernel *i;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* Special case if no rules already present */
+	if (chainptr->chain == NULL) {
+
+		/* If pointer writes are atomic then turning off
+		 * interupts is not necessary. */
+		chainptr->chain = rule;
+		if (rule->branch) rule->branch->refcount++;
+		return 0;
+	}
+
+	/* Find the rule before the end of the chain */
+	for (i = chainptr->chain; i->next; i = i->next);
+	i->next = rule;
+	if (rule->branch) rule->branch->refcount++;
+	return 0;
+}
+
+/* This function inserts a rule at the position of position in the
+ * chain refenced by chainptr.  If position is 1 then this rule will
+ * become the new rule one. */
+static int insert_in_chain(struct ip_chain *chainptr,
+			   struct ip_fwkernel *frwl,
+			   __u32 position)
+{
+	struct ip_fwkernel *f = chainptr->chain;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* special case if the position is number 1 */
+	if (position == 1) {
+		frwl->next = chainptr->chain;
+		if (frwl->branch) frwl->branch->refcount++;
+		chainptr->chain = frwl;
+		return 0;
+	}
+	position--;
+	while (--position && f != NULL) f = f->next;
+	if (f == NULL)
+		return EINVAL;
+	if (frwl->branch) frwl->branch->refcount++;
+	frwl->next = f->next;
+
+	f->next = frwl;
+	return 0;
+}
+
+/* This function deletes the a rule from a given rulenum and chain.
+ * With rulenum = 1 is the first rule is deleted. */
+
+static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
+{
+	struct ip_fwkernel *i=chainptr->chain,*tmp;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+
+	if (!chainptr->chain)
+		return ENOENT;
+
+	/* Need a special case for the first rule */
+	if (rulenum == 1) {
+		/* store temp to allow for freeing up of memory */
+		tmp = chainptr->chain;
+	        if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
+		chainptr->chain = chainptr->chain->next;
+		kfree(tmp); /* free memory that is now unused */
+	} else {
+		rulenum--;
+		while (--rulenum && i->next ) i = i->next;
+		if (!i->next)
+			return ENOENT;
+		tmp = i->next;
+		if (i->next->branch)
+			i->next->branch->refcount--;
+		i->next = i->next->next;
+		kfree(tmp);
+	}
+	return 0;
+}
+
+
+/* This function deletes the a rule from a given rule and chain.
+ * The rule that is deleted is the first occursance of that rule. */
+static int del_rule_from_chain(struct ip_chain *chainptr,
+			       struct ip_fwkernel *frwl)
+{
+	struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
+	int was_found;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+
+	/* Sure, we should compare marks, but since the `ipfwadm'
+	 * script uses it for an unholy hack... well, life is easier
+	 * this way.  We also mask it out of the flags word. --PR */
+	for (ltmp=NULL, was_found=0;
+	     !was_found && ftmp != NULL;
+	     ltmp = ftmp,ftmp = ftmp->next) {
+		if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr
+		    || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
+		    || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
+		    || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
+#if 0
+		    || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg
+#else
+		    || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS)
+			!= (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
+#endif
+		    || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
+		    || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
+#if 0
+		    || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark
+#endif
+		    || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
+		    || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
+		    || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
+		    || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
+		    || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
+		    || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
+			duprintf("del_rule_from_chain: mismatch:"
+				 "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
+				 "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
+				 "mark:%u/%u "
+				 "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
+				 "outputsize:%hu-%hu\n",
+				 ftmp->ipfw.fw_src.s_addr,
+				 frwl->ipfw.fw_src.s_addr,
+				 ftmp->ipfw.fw_dst.s_addr,
+				 frwl->ipfw.fw_dst.s_addr,
+				 ftmp->ipfw.fw_smsk.s_addr,
+				 frwl->ipfw.fw_smsk.s_addr,
+				 ftmp->ipfw.fw_dmsk.s_addr,
+				 frwl->ipfw.fw_dmsk.s_addr,
+				 ftmp->ipfw.fw_flg,
+				 frwl->ipfw.fw_flg,
+				 ftmp->ipfw.fw_invflg,
+				 frwl->ipfw.fw_invflg,
+				 ftmp->ipfw.fw_proto,
+				 frwl->ipfw.fw_proto,
+				 ftmp->ipfw.fw_mark,
+				 frwl->ipfw.fw_mark,
+				 ftmp->ipfw.fw_spts[0],
+				 frwl->ipfw.fw_spts[0],
+				 ftmp->ipfw.fw_spts[1],
+				 frwl->ipfw.fw_spts[1],
+				 ftmp->ipfw.fw_dpts[0],
+				 frwl->ipfw.fw_dpts[0],
+				 ftmp->ipfw.fw_dpts[1],
+				 frwl->ipfw.fw_dpts[1],
+				 ftmp->ipfw.fw_outputsize,
+				 frwl->ipfw.fw_outputsize);
+			continue;
+		}
+
+		if (strncmp(ftmp->ipfw.fw_vianame,
+			    frwl->ipfw.fw_vianame,
+			    IFNAMSIZ)) {
+			duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
+				 ftmp->ipfw.fw_vianame,
+				 frwl->ipfw.fw_vianame);
+		        continue;
+		}
+		if (ftmp->branch != frwl->branch) {
+			duprintf("del_rule_from_chain: branch mismatch: "
+				 "%s/%s\n",
+				 ftmp->branch?ftmp->branch->label:"(null)",
+				 frwl->branch?frwl->branch->label:"(null)");
+			continue;
+		}
+		if (ftmp->branch == NULL
+		    && ftmp->simplebranch != frwl->simplebranch) {
+			duprintf("del_rule_from_chain: simplebranch mismatch: "
+				 "%i/%i\n",
+				 ftmp->simplebranch, frwl->simplebranch);
+			continue;
+		}
+		was_found = 1;
+		if (ftmp->branch)
+			ftmp->branch->refcount--;
+		if (ltmp)
+			ltmp->next = ftmp->next;
+		else
+			chainptr->chain = ftmp->next;
+		kfree(ftmp);
+		break;
+	}
+
+	if (was_found)
+		return 0;
+	else {
+		duprintf("del_rule_from_chain: no matching rule found\n");
+		return EINVAL;
+	}
+}
+
+/* This function takes the label of a chain and deletes the first
+ * chain with that name.  No special cases required for the built in
+ * chains as they have their refcount initilised to 1 so that they are
+ * never deleted.  */
+static int del_chain(ip_chainlabel label)
+{
+	struct ip_chain *tmp,*tmp2;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* Corner case: return EBUSY not ENOENT for first elem ("input") */
+	if (strcmp(label, ip_fw_chains->label) == 0)
+		return EBUSY;
+
+	for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
+		if(strcmp(tmp->next->label,label) == 0)
+			break;
+
+	tmp2 = tmp->next;
+	if (!tmp2)
+		return ENOENT;
+
+	if (tmp2->refcount)
+		return EBUSY;
+
+	if (tmp2->chain)
+		return ENOTEMPTY;
+
+	tmp->next = tmp2->next;
+	kfree(tmp2);
+	return 0;
+}
+
+/* This is a function to initilise a chain.  Built in rules start with
+ * refcount = 1 so that they cannot be deleted.  User defined rules
+ * start with refcount = 0 so they can be deleted. */
+static struct ip_chain *ip_init_chain(ip_chainlabel name,
+				      __u32 ref,
+				      int policy)
+{
+	unsigned int i;
+	struct ip_chain *label
+		= kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
+	if (label == NULL)
+		panic("Can't kmalloc for firewall chains.\n");
+	strcpy(label->label,name);
+	label->next = NULL;
+	label->chain = NULL;
+	label->refcount = ref;
+	label->policy = policy;
+	for (i = 0; i < smp_num_cpus*2; i++) {
+		label->reent[i].counters.pcnt = label->reent[i].counters.bcnt
+			= 0;
+		label->reent[i].prevchain = NULL;
+		label->reent[i].prevrule = NULL;
+	}
+
+	return label;
+}
+
+/* This is a function for reating a new chain.  The chains is not
+ * created if a chain of the same name already exists */
+static int create_chain(ip_chainlabel label)
+{
+	struct ip_chain *tmp;
+
+	if (!check_label(label))
+		return EINVAL;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
+		if (strcmp(tmp->label,label) == 0)
+			return EEXIST;
+
+	if (strcmp(tmp->label,label) == 0)
+		return EEXIST;
+
+	tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
+					      * zero since this is a
+					      * user defined chain *
+					      * and therefore can be
+					      * deleted */
+	return 0;
+}
+
+/* This function simply changes the policy on one of the built in
+ * chains.  checking must be done before this is call to ensure that
+ * chainptr is pointing to one of the three possible chains */
+static int change_policy(struct ip_chain *chainptr, int policy)
+{
+	FWC_HAVE_LOCK(fwc_wlocks);
+	chainptr->policy = policy;
+	return 0;
+}
+
+/* This function takes an ip_fwuser and converts it to a ip_fwkernel.  It also
+ * performs some checks in the structure. */
+static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
+{
+	struct ip_fwkernel *fwkern;
+
+	if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
+		duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
+			 fwuser->ipfw.fw_flg);
+		*errno = EINVAL;
+		return NULL;
+	}
+
+#ifdef DEBUG_IP_FIREWALL_USER
+	/* These are sanity checks that don't really matter.
+	 * We can get rid of these once testing is complete.
+	 */
+	if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
+	    && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
+		|| fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
+		duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
+	    && fwuser->ipfw.fw_redirpt != 0) {
+		duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
+	     && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
+	    || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
+		&& (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
+		duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT)
+	     && fwuser->ipfw.fw_spts[0] == 0
+	     && fwuser->ipfw.fw_spts[1] == 0xFFFF)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT)
+		&& fwuser->ipfw.fw_dpts[0] == 0
+		&& fwuser->ipfw.fw_dpts[1] == 0xFFFF)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA)
+		&& (fwuser->ipfw.fw_vianame)[0] == '\0')
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
+		&& fwuser->ipfw.fw_smsk.s_addr == 0)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
+		&& fwuser->ipfw.fw_dmsk.s_addr == 0)) {
+		duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
+	    && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
+	    && (fwuser->ipfw.fw_spts[0] != 0
+		|| fwuser->ipfw.fw_spts[1] != 0xFFFF
+		|| fwuser->ipfw.fw_dpts[0] != 0
+		|| fwuser->ipfw.fw_dpts[1] != 0xFFFF
+		|| (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
+		duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+#endif
+
+	if ((fwuser->ipfw.fw_spts[0] != 0
+	     || fwuser->ipfw.fw_spts[1] != 0xFFFF
+	     || fwuser->ipfw.fw_dpts[0] != 0
+	     || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
+	    && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
+		|| (fwuser->ipfw.fw_proto != IPPROTO_TCP
+		    && fwuser->ipfw.fw_proto != IPPROTO_UDP
+		    && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
+		duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL);
+	if (!fwkern) {
+		duprintf("convert_ipfw: kmalloc failed!\n");
+		*errno = ENOMEM;
+		return NULL;
+	}
+	memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
+
+	if (!find_special(fwuser->label, &fwkern->simplebranch)) {
+		fwkern->branch = find_label(fwuser->label);
+		if (!fwkern->branch) {
+			duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
+				 fwuser->label);
+			kfree(fwkern);
+			*errno = ENOENT;
+			return NULL;
+		} else if (fwkern->branch == IP_FW_INPUT_CHAIN
+			   || fwkern->branch == IP_FW_FORWARD_CHAIN
+			   || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
+			duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
+				 fwuser->label);
+			kfree(fwkern);
+			*errno = ENOENT;
+			return NULL;
+		}
+	} else
+		fwkern->branch = NULL;
+	memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
+
+	/* Handle empty vianame by making it a wildcard */
+	if ((fwkern->ipfw.fw_vianame)[0] == '\0')
+	    fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
+
+	fwkern->next = NULL;
+	return fwkern;
+}
+
+int ip_fw_ctl(int cmd, void *m, int len)
+{
+	int ret;
+	struct ip_chain *chain;
+	unsigned long flags;
+
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	switch (cmd) {
+	case IP_FW_FLUSH:
+		if (len != sizeof(ip_chainlabel) || !check_label(m))
+			ret = EINVAL;
+		else if ((chain = find_label(m)) == NULL)
+			ret = ENOENT;
+		else ret = clear_fw_chain(chain);
+		break;
+
+	case IP_FW_ZERO:
+		if (len != sizeof(ip_chainlabel) || !check_label(m))
+			ret = EINVAL;
+		else if ((chain = find_label(m)) == NULL)
+			ret = ENOENT;
+		else ret = zero_fw_chain(chain);
+		break;
+
+	case IP_FW_CHECK: {
+		struct ip_fwtest *new = m;
+		struct iphdr *ip;
+
+		/* Don't need write lock. */
+		FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+
+		if (len != sizeof(struct ip_fwtest) || !check_label(m))
+			return EINVAL;
+
+		/* Need readlock to do find_label */
+		FWC_READ_LOCK(&ip_fw_lock);
+
+		if ((chain = find_label(new->fwt_label)) == NULL)
+			ret = ENOENT;
+		else {
+			ip = &(new->fwt_packet.fwp_iph);
+
+			if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) {
+			    duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
+				     ip->ihl,
+				     sizeof(struct iphdr) / sizeof(int));
+			    ret = EINVAL;
+			}
+			else {
+				ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame,
+						  NULL, chain,
+						  NULL, SLOT_NUMBER(), 1);
+				switch (ret) {
+				case FW_ACCEPT:
+					ret = 0; break;
+				case FW_REDIRECT:
+					ret = ECONNABORTED; break;
+				case FW_MASQUERADE:
+					ret = ECONNRESET; break;
+				case FW_REJECT:
+					ret = ECONNREFUSED; break;
+					/* Hack to help diag; these only get
+					   returned when testing. */
+				case FW_SKIP+1:
+					ret = ELOOP; break;
+				case FW_SKIP:
+					ret = ENFILE; break;
+				default: /* FW_BLOCK */
+					ret = ETIMEDOUT; break;
+				}
+			}
+		}
+		FWC_READ_UNLOCK(&ip_fw_lock);
+		return ret;
+	}
+
+	case IP_FW_MASQ_TIMEOUTS: {
+		ret = ip_fw_masq_timeouts(m, len);
+	}
+	break;
+
+	case IP_FW_REPLACE: {
+		struct ip_fwkernel *ip_fwkern;
+		struct ip_fwnew *new = m;
+
+		if (len != sizeof(struct ip_fwnew)
+		    || !check_label(new->fwn_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwn_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
+			 != NULL)
+			ret = replace_in_chain(chain, ip_fwkern,
+					       new->fwn_rulenum);
+	}
+	break;
+
+	case IP_FW_APPEND: {
+		struct ip_fwchange *new = m;
+		struct ip_fwkernel *ip_fwkern;
+
+		if (len != sizeof(struct ip_fwchange)
+		    || !check_label(new->fwc_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwc_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
+			 != NULL)
+			ret = append_to_chain(chain, ip_fwkern);
+	}
+	break;
+
+	case IP_FW_INSERT: {
+		struct ip_fwkernel *ip_fwkern;
+		struct ip_fwnew *new = m;
+
+		if (len != sizeof(struct ip_fwnew)
+		    || !check_label(new->fwn_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwn_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
+			 != NULL)
+			ret = insert_in_chain(chain, ip_fwkern,
+					      new->fwn_rulenum);
+	}
+	break;
+
+	case IP_FW_DELETE: {
+		struct ip_fwchange *new = m;
+		struct ip_fwkernel *ip_fwkern;
+
+		if (len != sizeof(struct ip_fwchange)
+		    || !check_label(new->fwc_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwc_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
+			 != NULL) {
+			ret = del_rule_from_chain(chain, ip_fwkern);
+			kfree(ip_fwkern);
+		}
+	}
+	break;
+
+	case IP_FW_DELETE_NUM: {
+		struct ip_fwdelnum *new = m;
+
+		if (len != sizeof(struct ip_fwdelnum)
+		    || !check_label(new->fwd_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwd_label)) == NULL)
+			ret = ENOENT;
+		else ret = del_num_from_chain(chain, new->fwd_rulenum);
+	}
+	break;
+
+	case IP_FW_CREATECHAIN: {
+		if (len != sizeof(ip_chainlabel)) {
+			duprintf("create_chain: bad size %i\n", len);
+			ret = EINVAL;
+		}
+		else ret = create_chain(m);
+	}
+	break;
+
+	case IP_FW_DELETECHAIN: {
+		if (len != sizeof(ip_chainlabel)) {
+			duprintf("delete_chain: bad size %i\n", len);
+			ret = EINVAL;
+		}
+		else ret = del_chain(m);
+	}
+	break;
+
+	case IP_FW_POLICY: {
+		struct ip_fwpolicy *new = m;
+
+		if (len != sizeof(struct ip_fwpolicy)
+		    || !check_label(new->fwp_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwp_label)) == NULL)
+			ret = ENOENT;
+		else if (chain != IP_FW_INPUT_CHAIN
+			 && chain != IP_FW_FORWARD_CHAIN
+			 && chain != IP_FW_OUTPUT_CHAIN) {
+			duprintf("change_policy: can't change policy on user"
+				 " defined chain.\n");
+			ret = EINVAL;
+		}
+		else {
+		        int pol = FW_SKIP;
+			find_special(new->fwp_policy, &pol);
+
+			switch(pol) {
+			case FW_MASQUERADE:
+				if (chain != IP_FW_FORWARD_CHAIN) {
+					ret = EINVAL;
+					break;
+				}
+				/* Fall thru... */
+			case FW_BLOCK:
+			case FW_ACCEPT:
+			case FW_REJECT:
+				ret = change_policy(chain, pol);
+				break;
+			default:
+			        duprintf("change_policy: bad policy `%s'\n",
+					 new->fwp_policy);
+				ret = EINVAL;
+			}
+		}
+		break;
+	}
+	default:
+		duprintf("ip_fw_ctl:  unknown request %d\n",cmd);
+		ret = ENOPROTOOPT;
+	}
+
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+	return ret;
+}
+
+/* Returns bytes used - doesn't NUL terminate */
+static int dump_rule(char *buffer,
+		     const char *chainlabel,
+		     const struct ip_fwkernel *rule)
+{
+	int len;
+	unsigned int i;
+	__u64 packets = 0, bytes = 0;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (i = 0; i < NUM_SLOTS; i++) {
+		packets += rule->counters[i].pcnt;
+		bytes += rule->counters[i].bcnt;
+	}
+
+	len=sprintf(buffer,
+		    "%9s "			/* Chain name */
+		    "%08X/%08X->%08X/%08X "	/* Source & Destination IPs */
+		    "%.16s "			/* Interface */
+		    "%X %X "			/* fw_flg and fw_invflg fields */
+		    "%u "			/* Protocol */
+		    "%-9u %-9u %-9u %-9u "	/* Packet & byte counters */
+		    "%u-%u %u-%u "		/* Source & Dest port ranges */
+		    "A%02X X%02X "		/* TOS and and xor masks */
+		    "%08X "			/* Redirection port */
+		    "%u "			/* fw_mark field */
+		    "%u "			/* output size */
+		    "%9s\n",			/* Target */
+		    chainlabel,
+		    ntohl(rule->ipfw.fw_src.s_addr),
+		    ntohl(rule->ipfw.fw_smsk.s_addr),
+		    ntohl(rule->ipfw.fw_dst.s_addr),
+		    ntohl(rule->ipfw.fw_dmsk.s_addr),
+		    (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
+		    rule->ipfw.fw_flg,
+		    rule->ipfw.fw_invflg,
+		    rule->ipfw.fw_proto,
+		    (__u32)(packets >> 32), (__u32)packets,
+		    (__u32)(bytes >> 32), (__u32)bytes,
+		    rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
+		    rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1],
+		    rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor,
+		    rule->ipfw.fw_redirpt,
+		    rule->ipfw.fw_mark,
+		    rule->ipfw.fw_outputsize,
+		    branchname(rule->branch,rule->simplebranch));
+
+	duprintf("dump_rule: %i bytes done.\n", len);
+	return len;
+}
+
+/* File offset is actually in records, not bytes. */
+static int ip_chain_procinfo(char *buffer, char **start,
+			     off_t offset, int length
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29)
+			     , int reset
+#endif
+	)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29)
+	/* FIXME: No more `atomic' read and reset.  Wonderful 8-( --RR */
+	int reset = 0;
+#endif
+	struct ip_chain *i;
+	struct ip_fwkernel *j = ip_fw_chains->chain;
+	unsigned long flags;
+	int len = 0;
+	int last_len = 0;
+	off_t upto = 0;
+
+	duprintf("Offset starts at %lu\n", offset);
+	duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
+
+	/* Need a write lock to lock out ``readers'' which update counters. */
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	for (i = ip_fw_chains; i; i = i->next) {
+	    for (j = i->chain; j; j = j->next) {
+		if (upto == offset) break;
+		duprintf("Skipping rule in chain `%s'\n",
+			 i->label);
+		upto++;
+	    }
+	    if (upto == offset) break;
+	}
+
+	/* Don't init j first time, or once i = NULL */
+	for (; i; (void)((i = i->next) && (j = i->chain))) {
+		duprintf("Dumping chain `%s'\n", i->label);
+		for (; j; j = j->next, upto++, last_len = len)
+		{
+			len += dump_rule(buffer+len, i->label, j);
+			if (len > length) {
+				duprintf("Dumped to %i (past %i).  "
+					 "Moving back to %i.\n",
+					 len, length, last_len);
+				len = last_len;
+				goto outside;
+			}
+			else if (reset)
+				memset(j->counters, 0,
+				       sizeof(struct ip_counters)*NUM_SLOTS);
+		}
+	}
+outside:
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+	buffer[len] = '\0';
+
+	duprintf("ip_chain_procinfo: Length = %i (of %i).  Offset = %li.\n",
+		 len, length, upto);
+	/* `start' hack - see fs/proc/generic.c line ~165 */
+	*start=(char *)((unsigned int)upto-offset);
+	return len;
+}
+
+static int ip_chain_name_procinfo(char *buffer, char **start,
+				  off_t offset, int length)
+{
+	struct ip_chain *i;
+	int len = 0,last_len = 0;
+	off_t pos = 0,begin = 0;
+	unsigned long flags;
+
+	/* Need a write lock to lock out ``readers'' which update counters. */
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	for (i = ip_fw_chains; i; i = i->next)
+	{
+		unsigned int j;
+		__u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
+
+		for (j = 0; j < NUM_SLOTS; j++) {
+			packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
+			packetsHi += ((i->reent[j].counters.pcnt >> 32)
+				      & 0xFFFFFFFF);
+			bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
+			bytesHi += ((i->reent[j].counters.bcnt >> 32)
+				    & 0xFFFFFFFF);
+		}
+
+		/* print the label and the policy */
+		len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
+			     i->label,branchname(NULL, i->policy),i->refcount,
+			     packetsHi, packetsLo, bytesHi, bytesLo);
+		pos=begin+len;
+		if(pos<offset) {
+			len=0;
+			begin=pos;
+		}
+		else if(pos>offset+length) {
+			len = last_len;
+			break;
+		}
+
+		last_len = len;
+	}
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+
+	*start = buffer+(offset-begin);
+	len-=(offset-begin);
+	if(len>length)
+		len=length;
+	return len;
+}
+
+/*
+ *	Interface to the generic firewall chains.
+ */
+int ipfw_input_check(struct firewall_ops *this, int pf,
+		     struct net_device *dev, void *phdr, void *arg,
+		     struct sk_buff **pskb)
+{
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
+}
+
+int ipfw_output_check(struct firewall_ops *this, int pf,
+		      struct net_device *dev, void *phdr, void *arg,
+		      struct sk_buff **pskb)
+{
+	/* Locally generated bogus packets by root. <SIGH>. */
+	if (((struct iphdr *)phdr)->ihl * 4 < sizeof(struct iphdr)
+	    || (*pskb)->len < sizeof(struct iphdr))
+		return FW_ACCEPT;
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
+}
+
+int ipfw_forward_check(struct firewall_ops *this, int pf,
+		       struct net_device *dev, void *phdr, void *arg,
+		       struct sk_buff **pskb)
+{
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0);
+}
+
+struct firewall_ops ipfw_ops=
+{
+	NULL,
+	ipfw_forward_check,
+	ipfw_input_check,
+	ipfw_output_check,
+	NULL,
+	NULL
+};
+
+int ipfw_init_or_cleanup(int init)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	if (!init) goto cleanup;
+
+#ifdef DEBUG_IP_FIREWALL_LOCKING
+	fwc_wlocks = fwc_rlocks = 0;
+#endif
+
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
+	ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
+	if (ipfwsk == NULL)
+		goto cleanup_nothing;
+#endif
+
+	ret = register_firewall(PF_INET, &ipfw_ops);
+	if (ret < 0)
+		goto cleanup_netlink;
+
+	proc_net_create(IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, ip_chain_procinfo);
+	proc_net_create(IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, ip_chain_name_procinfo);
+
+	IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
+	IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
+	IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
+
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+	return ret;
+
+ cleanup:
+	while (ip_fw_chains) {
+		struct ip_chain *next = ip_fw_chains->next;
+
+		clear_fw_chain(ip_fw_chains);
+		kfree(ip_fw_chains);
+		ip_fw_chains = next;
+	}
+
+	proc_net_remove(IP_FW_PROC_CHAINS);
+	proc_net_remove(IP_FW_PROC_CHAIN_NAMES);
+
+	unregister_firewall(PF_INET, &ipfw_ops);
+
+ cleanup_netlink:
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
+	sock_release(ipfwsk->socket);
+
+ cleanup_nothing:
+#endif
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+	return ret;
+}

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)