patch-2.4.8 linux/arch/ia64/lib/clear_page.S

Next file: linux/arch/ia64/lib/clear_user.S
Previous file: linux/arch/ia64/lib/checksum.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.7/linux/arch/ia64/lib/clear_page.S linux/arch/ia64/lib/clear_page.S
@@ -1,8 +1,6 @@
 /*
  *
- * Optimized version of the standard clearpage() function
- *
- * Based on comments from ddd. Try not to overflow the write buffer.
+ * Optimized function to clear a page of memory.
  *
  * Inputs:
  *	in0:	address of page
@@ -13,27 +11,41 @@
  * Copyright (C) 1999-2001 Hewlett-Packard Co
  * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
  * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 1/06/01 davidm	Tuned for Itanium.
  */
 #include <asm/asmmacro.h>
 #include <asm/page.h>
 
+#define saved_lc	r2
+#define dst0		in0
+#define dst1		r8
+#define dst2		r9
+#define dst3		r10
+#define dst_fetch	r11
+
 GLOBAL_ENTRY(clear_page)
 	.prologue
-	alloc r11=ar.pfs,1,0,0,0
-	.save ar.lc, r16
-	mov r16=ar.lc		// slow
-
-	.body
-
-	mov r17=PAGE_SIZE/32-1	// -1 = repeat/until
+	.regstk 1,0,0,0
+	mov r16 = PAGE_SIZE/64-1	// -1 = repeat/until
 	;;
-	adds r18=16,in0
-	mov ar.lc=r17
+	.save ar.lc, saved_lc
+	mov saved_lc = ar.lc
+	.body
+	mov ar.lc = r16
+	adds dst1 = 16, dst0
+	adds dst2 = 32, dst0
+	adds dst3 = 48, dst0
+	adds dst_fetch = 512, dst0
 	;;
-1:	stf.spill.nta [in0]=f0,32
-	stf.spill.nta [r18]=f0,32
+1:	stf.spill.nta [dst0] = f0, 64
+	stf.spill.nta [dst1] = f0, 64
+	stf.spill.nta [dst2] = f0, 64
+	stf.spill.nta [dst3] = f0, 64
+
+	lfetch [dst_fetch], 64
 	br.cloop.dptk.few 1b
 	;;
-	mov ar.lc=r16		// restore lc
+	mov ar.lc = r2		// restore lc
 	br.ret.sptk.few rp
 END(clear_page)

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)