patch-2.4.21 linux-2.4.21/arch/sparc64/kernel/smp.c

Next file: linux-2.4.21/arch/sparc64/kernel/sparc64_ksyms.c
Previous file: linux-2.4.21/arch/sparc64/kernel/signal32.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.20/arch/sparc64/kernel/smp.c linux-2.4.21/arch/sparc64/kernel/smp.c
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/cache.h>
+#include <linux/timer.h>
 
 #include <asm/head.h>
 #include <asm/ptrace.h>
@@ -137,7 +138,6 @@
 void __init smp_callin(void)
 {
 	int cpuid = hard_smp_processor_id();
-	unsigned long pstate;
 	extern int bigkernel;
 	extern unsigned long kern_locked_tte_data;
 
@@ -155,50 +155,6 @@
 
 	cpu_probe();
 
-	/* Guarentee that the following sequences execute
-	 * uninterrupted.
-	 */
-	__asm__ __volatile__("rdpr	%%pstate, %0\n\t"
-			     "wrpr	%0, %1, %%pstate"
-			     : "=r" (pstate)
-			     : "i" (PSTATE_IE));
-
-	/* Set things up so user can access tick register for profiling
-	 * purposes.  Also workaround BB_ERRATA_1 by doing a dummy
-	 * read back of %tick after writing it.
-	 */
-	__asm__ __volatile__(
-		"sethi	%%hi(0x80000000), %%g1\n\t"
-		"ba,pt	%%xcc, 1f\n\t"
-		"sllx	%%g1, 32, %%g1\n\t"
-		".align	64\n"
-	"1:	rd	%%tick, %%g2\n\t"
-		"add	%%g2, 6, %%g2\n\t"
-		"andn	%%g2, %%g1, %%g2\n\t"
-		"wrpr	%%g2, 0, %%tick\n\t"
-		"rdpr	%%tick, %%g0"
-	: /* no outputs */
-	: /* no inputs */
-	: "g1", "g2");
-
-	if (SPARC64_USE_STICK) {
-		/* Let the user get at STICK too. */
-		__asm__ __volatile__(
-			"sethi	%%hi(0x80000000), %%g1\n\t"
-			"sllx	%%g1, 32, %%g1\n\t"
-			"rd	%%asr24, %%g2\n\t"
-			"andn	%%g2, %%g1, %%g2\n\t"
-			"wr	%%g2, 0, %%asr24"
-		: /* no outputs */
-		: /* no inputs */
-		: "g1", "g2");
-	}
-
-	/* Restore PSTATE_IE. */
-	__asm__ __volatile__("wrpr	%0, 0x0, %%pstate"
-			     : /* no outputs */
-			     : "r" (pstate));
-
 	smp_setup_percpu_timer();
 
 	__sti();
@@ -225,10 +181,6 @@
 extern int cpu_idle(void);
 extern void init_IRQ(void);
 
-void initialize_secondary(void)
-{
-}
-
 int start_secondary(void *unused)
 {
 	trap_init();
@@ -242,6 +194,158 @@
 	panic("SMP bolixed\n");
 }
 
+static unsigned long current_tick_offset;
+
+/* This tick register synchronization scheme is taken entirely from
+ * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit.
+ *
+ * The only change I've made is to rework it so that the master
+ * initiates the synchonization instead of the slave. -DaveM
+ */
+
+#define MASTER	0
+#define SLAVE	(SMP_CACHE_BYTES/sizeof(unsigned long))
+
+#define NUM_ROUNDS	64	/* magic value */
+#define NUM_ITERS	5	/* likewise */
+
+static spinlock_t itc_sync_lock = SPIN_LOCK_UNLOCKED;
+static unsigned long go[SLAVE + 1];
+
+#define DEBUG_TICK_SYNC	0
+
+static inline long get_delta (long *rt, long *master)
+{
+	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+	unsigned long tcenter, t0, t1, tm;
+	unsigned long i;
+
+	for (i = 0; i < NUM_ITERS; i++) {
+		t0 = tick_ops->get_tick();
+		go[MASTER] = 1;
+		membar("#StoreLoad");
+		while (!(tm = go[SLAVE]))
+			membar("#LoadLoad");
+		go[SLAVE] = 0;
+		membar("#StoreStore");
+		t1 = tick_ops->get_tick();
+
+		if (t1 - t0 < best_t1 - best_t0)
+			best_t0 = t0, best_t1 = t1, best_tm = tm;
+	}
+
+	*rt = best_t1 - best_t0;
+	*master = best_tm - best_t0;
+
+	/* average best_t0 and best_t1 without overflow: */
+	tcenter = (best_t0/2 + best_t1/2);
+	if (best_t0 % 2 + best_t1 % 2 == 2)
+		tcenter++;
+	return tcenter - best_tm;
+}
+
+void smp_synchronize_tick_client(void)
+{
+	long i, delta, adj, adjust_latency = 0, done = 0;
+	unsigned long flags, rt, master_time_stamp, bound;
+#if DEBUG_TICK_SYNC
+	struct {
+		long rt;	/* roundtrip time */
+		long master;	/* master's timestamp */
+		long diff;	/* difference between midpoint and master's timestamp */
+		long lat;	/* estimate of itc adjustment latency */
+	} t[NUM_ROUNDS];
+#endif
+
+	go[MASTER] = 1;
+
+	while (go[MASTER])
+		membar("#LoadLoad");
+
+	local_irq_save(flags);
+	{
+		for (i = 0; i < NUM_ROUNDS; i++) {
+			delta = get_delta(&rt, &master_time_stamp);
+			if (delta == 0) {
+				done = 1;	/* let's lock on to this... */
+				bound = rt;
+			}
+
+			if (!done) {
+				if (i > 0) {
+					adjust_latency += -delta;
+					adj = -delta + adjust_latency/4;
+				} else
+					adj = -delta;
+
+				tick_ops->add_tick(adj, current_tick_offset);
+			}
+#if DEBUG_TICK_SYNC
+			t[i].rt = rt;
+			t[i].master = master_time_stamp;
+			t[i].diff = delta;
+			t[i].lat = adjust_latency/4;
+#endif
+		}
+	}
+	local_irq_restore(flags);
+
+#if DEBUG_TICK_SYNC
+	for (i = 0; i < NUM_ROUNDS; i++)
+		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
+#endif
+
+	printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles,"
+	       "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);
+}
+
+static void smp_start_sync_tick_client(int cpu);
+
+static void smp_synchronize_one_tick(int cpu)
+{
+	unsigned long flags, i;
+
+	go[MASTER] = 0;
+
+	smp_start_sync_tick_client(cpu);
+
+	/* wait for client to be ready */
+	while (!go[MASTER])
+		membar("#LoadLoad");
+
+	/* now let the client proceed into his loop */
+	go[MASTER] = 0;
+	membar("#StoreLoad");
+
+	spin_lock_irqsave(&itc_sync_lock, flags);
+	{
+		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
+			while (!go[MASTER])
+				membar("#LoadLoad");
+			go[MASTER] = 0;
+			membar("#StoreStore");
+			go[SLAVE] = tick_ops->get_tick();
+			membar("#StoreLoad");
+		}
+	}
+	spin_unlock_irqrestore(&itc_sync_lock, flags);
+}
+
+static void smp_synchronize_tick(void)
+{
+	int cpu = smp_processor_id();
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_present_map & (1UL << i)) {
+			if (i == cpu)
+				continue;
+			smp_synchronize_one_tick(i);
+		}
+	}
+}
+
 extern struct prom_cpuinfo linux_cpus[64];
 
 extern unsigned long sparc64_cpu_startup;
@@ -340,6 +444,8 @@
 	}
 	smp_processors_ready = 1;
 	membar("#StoreStore | #StoreLoad");
+
+	smp_synchronize_tick();
 }
 
 static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
@@ -542,6 +648,15 @@
 	}
 }
 
+extern unsigned long xcall_sync_tick;
+
+static void smp_start_sync_tick_client(int cpu)
+{
+	smp_cross_call_masked(&xcall_sync_tick,
+			      0, 0, 0,
+			      (1UL << cpu));
+}
+
 /* Send cross call to all processors except self. */
 #define smp_cross_call(func, ctx, data1, data2) \
 	smp_cross_call_masked(func, ctx, data1, data2, cpu_present_map)
@@ -987,8 +1102,6 @@
 
 extern void sparc64_do_profile(unsigned long pc, unsigned long o7);
 
-static unsigned long current_tick_offset;
-
 #define prof_multiplier(__cpu)		cpu_data[(__cpu)].multiplier
 #define prof_counter(__cpu)		cpu_data[(__cpu)].counter
 
@@ -1002,12 +1115,7 @@
 	 * Check for level 14 softint.
 	 */
 	{
-		unsigned long tick_mask;
-
-		if (SPARC64_USE_STICK)
-			tick_mask = (1UL << 16);
-		else
-			tick_mask = (1UL << 0);
+		unsigned long tick_mask = tick_ops->softint_mask;
 
 		if (!(get_softint() & tick_mask)) {
 			extern void handler_irq(int, struct pt_regs *);
@@ -1022,17 +1130,17 @@
 		if (!user)
 			sparc64_do_profile(regs->tpc, regs->u_regs[UREG_RETPC]);
 		if (!--prof_counter(cpu)) {
-			if (cpu == boot_cpu_id) {
-				irq_enter(cpu, 0);
+			irq_enter(cpu, 0);
 
+			if (cpu == boot_cpu_id) {
 				kstat.irqs[cpu][0]++;
 				timer_tick_interrupt(regs);
-
-				irq_exit(cpu, 0);
 			}
 
 			update_process_times(user);
 
+			irq_exit(cpu, 0);
+
 			prof_counter(cpu) = prof_multiplier(cpu);
 		}
 
@@ -1044,47 +1152,14 @@
 				     : "=r" (pstate)
 				     : "i" (PSTATE_IE));
 
-		/* Workaround for Spitfire Errata (#54 I think??), I discovered
-		 * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch
-		 * number 103640.
-		 *
-		 * On Blackbird writes to %tick_cmpr can fail, the
-		 * workaround seems to be to execute the wr instruction
-		 * at the start of an I-cache line, and perform a dummy
-		 * read back from %tick_cmpr right after writing to it. -DaveM
-		 *
-		 * Just to be anal we add a workaround for Spitfire
-		 * Errata 50 by preventing pipeline bypasses on the
-		 * final read of the %tick register into a compare
-		 * instruction.  The Errata 50 description states
-		 * that %tick is not prone to this bug, but I am not
-		 * taking any chances.
-		 */
-		if (!SPARC64_USE_STICK) {
-		__asm__ __volatile__("rd	%%tick_cmpr, %0\n\t"
-				     "ba,pt	%%xcc, 1f\n\t"
-				     " add	%0, %2, %0\n\t"
-				     ".align	64\n"
-				  "1: wr	%0, 0x0, %%tick_cmpr\n\t"
-				     "rd	%%tick_cmpr, %%g0\n\t"
-				     "rd	%%tick, %1\n\t"
-				     "mov	%1, %1"
-				     : "=&r" (compare), "=r" (tick)
-				     : "r" (current_tick_offset));
-		} else {
-		__asm__ __volatile__("rd	%%asr25, %0\n\t"
-				     "add	%0, %2, %0\n\t"
-				     "wr	%0, 0x0, %%asr25\n\t"
-				     "rd	%%asr24, %1\n\t"
-				     : "=&r" (compare), "=r" (tick)
-				     : "r" (current_tick_offset));
-		}
+		compare = tick_ops->add_compare(current_tick_offset);
+		tick = tick_ops->get_tick();
 
 		/* Restore PSTATE_IE. */
 		__asm__ __volatile__("wrpr	%0, 0x0, %%pstate"
 				     : /* no outputs */
 				     : "r" (pstate));
-	} while (tick >= compare);
+	} while (time_after_eq(tick, compare));
 }
 
 static void __init smp_setup_percpu_timer(void)
@@ -1102,35 +1177,7 @@
 			     : "=r" (pstate)
 			     : "i" (PSTATE_IE));
 
-	/* Workaround for Spitfire Errata (#54 I think??), I discovered
-	 * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch
-	 * number 103640.
-	 *
-	 * On Blackbird writes to %tick_cmpr can fail, the
-	 * workaround seems to be to execute the wr instruction
-	 * at the start of an I-cache line, and perform a dummy
-	 * read back from %tick_cmpr right after writing to it. -DaveM
-	 */
-	if (!SPARC64_USE_STICK) {
-	__asm__ __volatile__(
-		"rd	%%tick, %%g1\n\t"
-		"ba,pt	%%xcc, 1f\n\t"
-		" add	%%g1, %0, %%g1\n\t"
-		".align	64\n"
-	"1:	wr	%%g1, 0x0, %%tick_cmpr\n\t"
-		"rd	%%tick_cmpr, %%g0"
-	: /* no outputs */
-	: "r" (current_tick_offset)
-	: "g1");
-	} else {
-	__asm__ __volatile__(
-		"rd	%%asr24, %%g1\n\t"
-		"add	%%g1, %0, %%g1\n\t"
-		"wr	%%g1, 0x0, %%asr25"
-	: /* no outputs */
-	: "r" (current_tick_offset)
-	: "g1");
-	}
+	tick_ops->init_tick(current_tick_offset);
 
 	/* Restore PSTATE_IE. */
 	__asm__ __volatile__("wrpr	%0, 0x0, %%pstate"

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)