patch-2.3.43 linux/kernel/softirq.c

Next file: linux/kernel/sys.c
Previous file: linux/kernel/sched.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.42/linux/kernel/softirq.c linux/kernel/softirq.c
@@ -3,68 +3,271 @@
  *
  *	Copyright (C) 1992 Linus Torvalds
  *
- * do_bottom_half() runs at normal kernel priority: all interrupts
- * enabled.  do_bottom_half() is atomic with respect to itself: a
- * bottom_half handler need not be re-entrant.
- *
  * Fixed a disable_bh()/enable_bh() race (was causing a console lockup)
  * due bh_mask_count not atomic handling. Copyright (C) 1998  Andrea Arcangeli
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
  */
 
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
 #include <linux/smp_lock.h>
+#include <linux/init.h>
 
-#include <asm/io.h>
+/*
+   - No shared variables, all the data are CPU local.
+   - If a softirq needs serialization, let it serialize itself
+     by its own spinlocks.
+   - Even if softirq is serialized, only local cpu is marked for
+     execution. Hence, we get something sort of weak cpu binding.
+     Though it is still not clear, will it result in better locality
+     or will not.
+   - These softirqs are not masked by global cli() and start_bh_atomic()
+     (by clear reasons). Hence, old parts of code still using global locks
+     MUST NOT use softirqs, but insert interfacing routines acquiring
+     global locks. F.e. look at BHs implementation.
 
-/* intr_count died a painless death... -DaveM */
+   Examples:
+   - NET RX softirq. It is multithreaded and does not require
+     any global serialization.
+   - NET TX softirq. It kicks software netdevice queues, hence
+     it is logically serialized per device, but this serialization
+     is invisible to common code.
+   - Tasklets: serialized wrt itself.
+   - Bottom halves: globally serialized, grr...
+ */
 
-atomic_t bh_mask_count[32];
-unsigned long bh_active = 0;
-unsigned long bh_mask = 0;
-void (*bh_base[32])(void);
 
-/*
- * This needs to make sure that only one bottom half handler
- * is ever active at a time. We do this without locking by
- * doing an atomic increment on the intr_count, and checking
- * (nonatomically) against 1. Only if it's 1 do we schedule
- * the bottom half.
- *
- * Note that the non-atomicity of the test (as opposed to the
- * actual update) means that the test may fail, and _nobody_
- * runs the handlers if there is a race that makes multiple
- * CPU's get here at the same time. That's ok, we'll run them
- * next time around.
- */
-static inline void run_bottom_halves(void)
+struct softirq_state softirq_state[NR_CPUS];
+static struct softirq_action softirq_vec[32];
+
+asmlinkage void do_softirq()
+{
+	int cpu = smp_processor_id();
+	__u32 active, mask;
+
+	if (in_interrupt())
+		return;
+
+	local_bh_disable();
+
+	local_irq_disable();
+	mask = softirq_state[cpu].mask;
+	active = softirq_state[cpu].active & mask;
+
+	if (active) {
+		struct softirq_action *h;
+
+restart:
+		/* Reset active bitmask before enabling irqs */
+		softirq_state[cpu].active &= ~active;
+
+		local_irq_enable();
+
+		h = softirq_vec;
+		mask &= ~active;
+
+		do {
+			if (active & 1)
+				h->action(h);
+			h++;
+			active >>= 1;
+		} while (active);
+
+		local_irq_disable();
+
+		active = softirq_state[cpu].active;
+		if ((active &= mask) != 0)
+			goto retry;
+	}
+
+	local_bh_enable();
+
+	/* Leave with locally disabled hard irqs. It is critical to close
+	 * window for infinite recursion, while we help local bh count,
+	 * it protected us. Now we are defenceless.
+	 */
+	return;
+
+retry:
+	goto restart;
+}
+
+
+static spinlock_t softirq_mask_lock = SPIN_LOCK_UNLOCKED;
+
+void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&softirq_mask_lock, flags);
+	softirq_vec[nr].data = data;
+	softirq_vec[nr].action = action;
+
+	for (i=0; i<NR_CPUS; i++)
+		softirq_state[i].mask |= (1<<nr);
+	spin_unlock_irqrestore(&softirq_mask_lock, flags);
+}
+
+
+/* Tasklets */
+
+struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned;
+
+static void tasklet_action(struct softirq_action *a)
 {
-	unsigned long active;
-	void (**bh)(void);
+	int cpu = smp_processor_id();
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = tasklet_vec[cpu].list;
+	tasklet_vec[cpu].list = NULL;
+	local_irq_enable();
+
+	while (list != NULL) {
+		struct tasklet_struct *t = list;
 
-	active = get_active_bhs();
-	clear_active_bhs(active);
-	bh = bh_base;
-	do {
-		if (active & 1)
-			(*bh)();
-		bh++;
-		active >>= 1;
-	} while (active);
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (atomic_read(&t->count) == 0) {
+				clear_bit(TASKLET_STATE_SCHED, &t->state);
+
+				t->func(t->data);
+				tasklet_unlock(t);
+				continue;
+			}
+			tasklet_unlock(t);
+		}
+		local_irq_disable();
+		t->next = tasklet_vec[cpu].list;
+		tasklet_vec[cpu].list = t;
+		__cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+		local_irq_enable();
+	}
 }
 
-asmlinkage void do_bottom_half(void)
+
+
+struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned;
+
+static void tasklet_hi_action(struct softirq_action *a)
 {
 	int cpu = smp_processor_id();
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = tasklet_hi_vec[cpu].list;
+	tasklet_hi_vec[cpu].list = NULL;
+	local_irq_enable();
+
+	while (list != NULL) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (atomic_read(&t->count) == 0) {
+				clear_bit(TASKLET_STATE_SCHED, &t->state);
 
-	if (softirq_trylock(cpu)) {
-		if (hardirq_trylock(cpu)) {
-			__sti();
-			run_bottom_halves();
-			__cli();
-			hardirq_endlock(cpu);
+				t->func(t->data);
+				tasklet_unlock(t);
+				continue;
+			}
+			tasklet_unlock(t);
 		}
-		softirq_endlock(cpu);
+		local_irq_disable();
+		t->next = tasklet_hi_vec[cpu].list;
+		tasklet_hi_vec[cpu].list = t;
+		__cpu_raise_softirq(cpu, HI_SOFTIRQ);
+		local_irq_enable();
+	}
+}
+
+
+void tasklet_init(struct tasklet_struct *t,
+		  void (*func)(unsigned long), unsigned long data)
+{
+	t->func = func;
+	t->data = data;
+	t->state = 0;
+	atomic_set(&t->count, 0);
+}
+
+void tasklet_kill(struct tasklet_struct *t)
+{
+	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+		if (in_interrupt())
+			panic("Attempt to kill tasklet from interrupt\n");
+		schedule();
 	}
+	tasklet_unlock_wait(t);
+}
+
+
+
+/* Old style BHs */
+
+static void (*bh_base[32])(void);
+struct tasklet_struct bh_task_vec[32];
+
+/* BHs are serialized by spinlock global_bh_lock.
+
+   It is still possible to make synchronize_bh() as
+   spin_unlock_wait(&global_bh_lock). This operation is not used
+   by kernel now, so that this lock is not made private only
+   due to wait_on_irq().
+
+   It can be removed only after auditing all the BHs.
+ */
+spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED;
+
+static void bh_action(unsigned long nr)
+{
+	int cpu = smp_processor_id();
+
+	if (!spin_trylock(&global_bh_lock))
+		goto resched;
+
+	if (!hardirq_trylock(cpu))
+		goto resched_unlock;
+
+	if (bh_base[nr])
+		bh_base[nr]();
+
+	hardirq_endlock(cpu);
+	spin_unlock(&global_bh_lock);
+	return;
+
+resched_unlock:
+	spin_unlock(&global_bh_lock);
+resched:
+	mark_bh(nr);
+}
+
+void init_bh(int nr, void (*routine)(void))
+{
+	bh_base[nr] = routine;
+	mb();
+}
+
+void remove_bh(int nr)
+{
+	tasklet_kill(bh_task_vec+nr);
+	bh_base[nr] = NULL;
 }
+
+void __init softirq_init()
+{
+	int i;
+
+	for (i=0; i<32; i++)
+		tasklet_init(bh_task_vec+i, bh_action, i);
+
+	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
+	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+}
+
+

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)