patch-2.4.23 linux-2.4.23/arch/ia64/kernel/entry.S

Next file: linux-2.4.23/arch/ia64/kernel/entry.h
Previous file: linux-2.4.23/arch/ia64/kernel/efivars.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.22/arch/ia64/kernel/entry.S linux-2.4.23/arch/ia64/kernel/entry.S
@@ -3,6 +3,9 @@
  *
  * Kernel entry points.
  *
+ * Copyright (C) 2002-2003 
+ * 	Suresh Siddha <suresh.b.siddha@intel.com> 
+ *	Fenghua Yu <fenghua.yu@intel.com>
  * Copyright (C) 1998-2002 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1999 VA Linux Systems
@@ -48,7 +51,10 @@
 ENTRY(ia64_execve)
 	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(3)
 	alloc loc1=ar.pfs,3,2,4,0
-	mov loc0=rp
+	/* Leave from kernel and restore all pt_regs to correspending registers. This is special 
+	 * because ia32 application needs scratch registers after return from execve.
+	 */
+	movl loc0=ia64_ret_from_execve_syscall 
 	.body
 	mov out0=in0			// filename
 	;;				// stop bit between alloc and call
@@ -72,19 +78,18 @@
 	 * this executes in less than 20 cycles even on Itanium, so it's not worth
 	 * optimizing for...).
 	 */
-	mov r4=0;		mov f2=f0;		mov b1=r0
-	mov r5=0;		mov f3=f0;		mov b2=r0
-	mov r6=0;		mov f4=f0;		mov b3=r0
-	mov r7=0;		mov f5=f0;		mov b4=r0
-	mov ar.unat=0;		mov f10=f0;		mov b5=r0
-	ldf.fill f11=[sp];	ldf.fill f12=[sp];	mov f13=f0
-	ldf.fill f14=[sp];	ldf.fill f15=[sp];	mov f16=f0
-	ldf.fill f17=[sp];	ldf.fill f18=[sp];	mov f19=f0
-	ldf.fill f20=[sp];	ldf.fill f21=[sp];	mov f22=f0
-	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
-	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
-	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
-	mov ar.lc=0
+	mov ar.unat=0; 		mov ar.lc=0;
+	mov r4=0;               mov f2=f0;              mov b1=r0
+	mov r5=0;               mov f3=f0;              mov b2=r0
+	mov r6=0;               mov f4=f0;              mov b3=r0
+	mov r7=0;               mov f5=f0;              mov b4=r0
+	ldf.fill f12=[sp];      mov f13=f0;             mov b5=r0
+	ldf.fill f14=[sp];      ldf.fill f15=[sp];      mov f16=f0
+	ldf.fill f17=[sp];      ldf.fill f18=[sp];      mov f19=f0
+	ldf.fill f20=[sp];      ldf.fill f21=[sp];      mov f22=f0
+	ldf.fill f23=[sp];      ldf.fill f24=[sp];      mov f25=f0
+	ldf.fill f26=[sp];      ldf.fill f27=[sp];      mov f28=f0
+	ldf.fill f29=[sp];      ldf.fill f30=[sp];      mov f31=f0
 	br.ret.sptk.many rp
 END(ia64_execve)
 
@@ -251,8 +256,6 @@
 	st8 [r14]=r21,16	// save b0
 	st8 [r15]=r22,16	// save b1
 	mov r25=b4
-	stf.spill [r2]=f10,32
-	stf.spill [r3]=f11,32
 	mov r26=b5
 	;;
 	st8 [r14]=r23,16	// save b2
@@ -351,9 +354,6 @@
 	ldf.fill f4=[r14],32
 	ldf.fill f5=[r15],32
 	;;
-	ldf.fill f10=[r14],32
-	ldf.fill f11=[r15],32
-	;;
 	ldf.fill f12=[r14],32
 	ldf.fill f13=[r15],32
 	;;
@@ -475,7 +475,7 @@
 .mem.offset 8,0;	st8.spill [r3]=r10	// clear error indication in slot for r10
 ia64_strace_leave_kernel:
 	br.call.sptk.many rp=invoke_syscall_trace // give parent a chance to catch return value
-.rety:	br.cond.sptk ia64_leave_kernel
+.rety:	br.cond.sptk ia64_leave_syscall
 
 strace_error:
 	ld8 r3=[r2]				// load pt_regs.r8
@@ -528,12 +528,186 @@
 (p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
 END(ia64_ret_from_syscall)
 	// fall through
+/*
+ * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
+ *	need to switch to bank 0 and doesn't restore the scratch registers.
+ *	To avoid leaking kernel bits, the scratch registers are set to
+ *	the following known-to-be-safe values:
+ *
+ *		  r1: restored (global pointer)
+ *		  r2: cleared
+ *		  r3: cleared
+ *	      r8-r11: restored (syscall return value(s))
+ *		 r12: restored (user-level stack pointer)
+ *		 r13: restored (user-level thread pointer)
+ *		 r14: cleared
+ *		 r15: restored (syscall #)
+ *	     r16-r19: cleared
+ *		 r20: user-level ar.fpsr
+ *		 r21: user-level b0
+ *		 r22: cleared
+ *		 r23: user-level ar.bspstore
+ *		 r24: user-level ar.rnat
+ *		 r25: user-level ar.unat
+ *		 r26: user-level ar.pfs
+ *		 r27: user-level ar.rsc
+ *		 r28: user-level ip
+ *		 r29: user-level psr
+ *		 r30: user-level cfm
+ *		 r31: user-level pr
+ *	      f6-f11: cleared
+ *		  pr: restored (user-level pr)
+ *		  b0: restored (user-level rp)
+ *	          b6: cleared
+ *		  b7: cleared
+ *	     ar.unat: restored (user-level ar.unat)
+ *	      ar.pfs: restored (user-level ar.pfs)
+ *	      ar.rsc: restored (user-level ar.rsc)
+ *	     ar.rnat: restored (user-level ar.rnat)
+ *	 ar.bspstore: restored (user-level ar.bspstore)
+ *	     ar.fpsr: restored (user-level ar.fpsr)
+ *	      ar.ccv: cleared
+ *	      ar.csd: cleared
+ *	      ar.ssd: cleared
+ */
+GLOBAL_ENTRY(ia64_leave_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	lfetch.fault [sp]
+	movl r14=.restart1
+	;;
+	mov.ret.sptk rp=r14,.restart1
+	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
+.restart1:
+	// need_resched and signals atomic test
+(pUser)	rsm psr.i
+	adds r17=IA64_TASK_NEED_RESCHED_OFFSET,r13
+	adds r18=IA64_TASK_SIGPENDING_OFFSET,r13
+#ifdef CONFIG_PERFMON
+	adds r19=IA64_TASK_PFM_OVFL_BLOCK_RESET_OFFSET,r13
+#endif
+	;;
+#ifdef CONFIG_PERFMON
+(pUser)	ld8 r19=[r19]				// load current->thread.pfm_ovfl_block_reset
+#endif
+(pUser)	ld8 r17=[r17]				// load current->need_resched
+(pUser)	ld4 r18=[r18]				// load current->sigpending
+	;;
+#ifdef CONFIG_PERFMON
+(pUser)	cmp.ne.unc p9,p0=r19,r0			// current->thread.pfm_ovfl_block_reset != 0?
+#endif
+(pUser)	cmp.ne.unc p7,p0=r17,r0			// current->need_resched != 0?
+(pUser)	cmp.ne.unc p8,p0=r18,r0			// current->sigpending != 0?
+	;;
+#ifdef CONFIG_PERFMON
+(p9)	br.call.spnt.many b7=pfm_ovfl_block_reset
+#endif
+#if __GNUC__ < 3
+(p7)	br.call.spnt.many b7=invoke_schedule
+#else
+(p7)	br.call.spnt.many b7=schedule
+#endif
+(p8)	br.call.spnt.many rp=handle_signal_delivery	// check & deliver pending signals (once)
+
+	mov  ar.csd=r0
+	mov  ar.ssd=r0
+	adds r16=PT(LOADRS)+16,r12
+	adds r17=PT(AR_BSPSTORE)+16, r12
+	mov  f6=f0		// clear f6
+	;;
+	ld8 r19=[r16],PT(R8)-PT(LOADRS)        		// load ar.rsc value for "loadrs"
+	ld8 r23=[r17],PT(R9)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
+	mov r22=r0		// clear r22
+	;;
+	// start restoring the state saved on the kernel stack (struct pt_regs):
+	ld8.fill r8=[r16],16
+	ld8.fill r9=[r17],16
+	mov  f7=f0		// clear f7
+	;;
+	ld8.fill r10=[r16],16
+	ld8.fill r11=[r17],16
+	mov  f8=f0		// clear f8
+	;;
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
+	mov b7=r0		// clear b7
+	;;
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
+	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
+	;;
+	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
+	invala			// invalidate ALAT
+	mov  f9=f0		// clear f9
+	;;
+	ld8 r26=[r16],16 	// load ar.pfs
+	ld8 r27=[r17],PT(PR)-PT(AR_RSC)// load ar.rsc
+	mov  f10=f0		// clear f10
+	;;
+	ld8 r24=[r16],PT(B0)-PT(AR_RNAT)// load ar.rnat (may be garbage)
+	ld8 r31=[r17],PT(R1)-PT(PR)	    // load predicates
+	mov  f11=f0		// clear f11
+	;;
+	ld8 r21=[r16],PT(R12)-PT(B0)// load b0
+	ld8.fill r1=[r17],16	// load r1
+	mov r3=r0		// clear r3
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+	mov r2=r0		// clear r2
+	;;
+	ld8 r20=[r16]		// ar.fpsr
+	ld8.fill r15=[r17]	// load r15
+	adds r18=16,r16
+	;;
+	mov r16=ar.bsp		// get existing backing store pointer
+	movl r17=PERCPU_ADDR+IA64_CPU_PHYS_STACKED_SIZE_P8_OFFSET
+	srlz.i			// ensure interruption collection is off
+	mov ar.ccv=r0		// clear ar.ccv
+	mov b6=r0		// clear b6
+	;;
+	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
+	mov r14=r0		// clear r14
+(pKern)	br.cond.dpnt skip_rbs_switch
+	/*
+	 * Restore user backing store.
+	 *
+	 * NOTE: alloc, loadrs, and cover can't be predicated.
+	 */
+	cover				// add current frame into dirty partition
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	mov r19=ar.bsp			// get new backing store pointer
+	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
+	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
+	;;
+	sub r19=r19,r16			// calculate total byte size of dirty partition
+	add r18=64,r18			// don't force in0-in7 into memory...
+	;;
+	shl r19=r19,16			// shift size of dirty partition into loadrs position
+	br.few dont_preserve_current_frame
+	;;
+END(ia64_leave_syscall)
+
+GLOBAL_ENTRY(ia64_ret_from_execve_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
+	;;
+	.mem.offset 0,0
+(p6)	st8.spill [r2]=r8	// store return value in slot for r8 and set unat bit
+	.mem.offset 8,0
+(p6)	st8.spill [r3]=r0	// clear error indication in slot for r10 and set unat bit
+(p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
+END(ia64_ret_from_execve_syscall)
+	// fall through
 GLOBAL_ENTRY(ia64_leave_kernel)
 	PT_REGS_UNWIND_INFO(0)
 	lfetch.fault [sp]
 	movl r14=.restart
 	;;
 	mov.ret.sptk rp=r14,.restart
+	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
 .restart:
 	// need_resched and signals atomic test
 (pUser)	rsm psr.i
@@ -564,18 +738,35 @@
 (p7)	br.call.spnt.many b7=schedule
 #endif
 (p8)	br.call.spnt.many rp=handle_signal_delivery	// check & deliver pending signals (once)
+
+	adds r20=PT(CR_IPSR)+16,r12
+	adds r21=PT(PR)+16,r12
 	;;
-.ret9:	adds r2=PT(R8)+16,r12
-	adds r3=PT(R9)+16,r12
+	lfetch.fault.excl [r20]
+	lfetch.fault.excl [r21]
+	adds r2=PT(B6)+16,r12
+	adds r3=PT(R16)+16,r12
+	mov r29=PT(R24)-PT(B6)
+	mov r30=PT(B7)-PT(R24)
 	;;
 	// start restoring the state saved on the kernel stack (struct pt_regs):
-	ld8.fill r8=[r2],16
-	ld8.fill r9=[r3],16
+	ld8 r28=[r2],r29	// b6
+	ld8.fill r16=[r3],128
+	mov r31=PT(AR_CSD)-PT(AR_CCV)
+	;;
+	ld8.fill r24=[r2],r30
+	ld8 r15=[r3],r31
+	;; 
+	ld8 r29=[r2],16		// b7
+	ld8 r30=[r3],16		// ar.csd
 	;;
-	ld8.fill r10=[r2],16
-	ld8.fill r11=[r3],16
+	ld8 r31=[r2],16		// ar.ssd
+	ld8.fill r8=[r3],16
 	;;
-	ld8.fill r16=[r2],16
+	ld8.fill r9=[r2],16
+	ld8.fill r10=[r3],PT(R17)-PT(R10)
+	;;
+	ld8.fill r11=[r2],PT(R18)-PT(R11)
 	ld8.fill r17=[r3],16
 	;;
 	ld8.fill r18=[r2],16
@@ -583,82 +774,78 @@
 	;;
 	ld8.fill r20=[r2],16
 	ld8.fill r21=[r3],16
-	;;
-	ld8.fill r22=[r2],16
-	ld8.fill r23=[r3],16
-	;;
-	ld8.fill r24=[r2],16
-	ld8.fill r25=[r3],16
-	;;
-	ld8.fill r26=[r2],16
-	ld8.fill r27=[r3],16
-	;;
-	ld8.fill r28=[r2],16
-	ld8.fill r29=[r3],16
-	;;
-	ld8.fill r30=[r2],16
-	ld8.fill r31=[r3],16
+	mov ar.csd=r30
+	mov ar.ssd=r31
 	;;
 	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
 	invala			// invalidate ALAT
 	;;
-	ld8 r1=[r2],16		// ar.ccv
-	ld8 r13=[r3],16		// ar.fpsr
+	ld8.fill r22=[r2],24
+	ld8.fill r23=[r3],24
+	mov b6=r28
+	;;
+	ld8.fill r25=[r2],16
+	ld8.fill r26=[r3],16
+	mov b7=r29
 	;;
-	ld8 r14=[r2],16		// b0
-	ld8 r15=[r3],16+8	// b7
+	ld8.fill r27=[r2],16
+	ld8.fill r28=[r3],16
 	;;
-	ldf.fill f6=[r2],32
-	ldf.fill f7=[r3],32
+	ld8.fill r29=[r2],16
+	ld8.fill r30=[r3],24
 	;;
-	ldf.fill f8=[r2],32
-	ldf.fill f9=[r3],32
+	ld8.fill r31=[r2],32
+	ldf.fill f6=[r3],32
 	;;
-	mov ar.ccv=r1
-	mov ar.fpsr=r13
-	mov b0=r14
+	ldf.fill f7=[r2],32
+	ldf.fill f8=[r3],32
 	;;
 	srlz.i			// ensure interruption collection is off
-	mov b7=r15
+	mov ar.ccv=r15
 	;;
+	ldf.fill f9=[r2],32
+	ldf.fill f10=[r3],32
 	bsw.0			// switch back to bank 0
 	;;
-	adds r16=16,r12
-	adds r17=24,r12
+	ldf.fill f11=[r2]
+	adds r16=PT(CR_IPSR)+16,r12
+	adds r17=PT(CR_IIP)+16,r12
 	;;
-	ld8 rCRIPSR=[r16],16	// load cr.ipsr
-	ld8 rCRIIP=[r17],16	// load cr.iip
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
 	;;
-	ld8 rCRIFS=[r16],16	// load cr.ifs
-	ld8 rARUNAT=[r17],16	// load ar.unat
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
 	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
 	;;
-	ld8 rARPFS=[r16],16	// load ar.pfs
-	ld8 rARRSC=[r17],16	// load ar.rsc
+	ld8 r26=[r16],16	// load ar.pfs
+	ld8 r27=[r17],16	// load ar.rsc
 	;;
-	ld8 rARRNAT=[r16],16	// load ar.rnat (may be garbage)
-	ld8 rARBSPSTORE=[r17],16 // load ar.bspstore (may be garbage)
+	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
+	ld8 r23=[r17],16 // load ar.bspstore (may be garbage)
 	;;
-	ld8 rARPR=[r16],16	// load predicates
-	ld8 rB6=[r17],16	// load b6
+	ld8 r31=[r16],16	// load predicates
+	ld8 r21=[r17],16	// load b0
 	;;
 	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
 	ld8.fill r1=[r17],16	// load r1
 	;;
-	ld8.fill r2=[r16],16
-	ld8.fill r3=[r17],16
-	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
 	;;
+	ld8 r20=[r16],16
+	ld8.fill r15=[r17],16
+	;;
 	ld8.fill r14=[r16]
-	ld8.fill r15=[r17]
-	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	ld8.fill r2=[r17],16
+	adds r18=16,r16
 	;;
 	mov r16=ar.bsp		// get existing backing store pointer
 	movl r17=PERCPU_ADDR+IA64_CPU_PHYS_STACKED_SIZE_P8_OFFSET
 	;;
+	ld8.fill r3=[r18]
 	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
 (pKern)	br.cond.dpnt skip_rbs_switch
 	/*
 	 * Restore user backing store.
@@ -718,7 +905,7 @@
 }{ .mib
 	mov loc3=0
 	mov loc4=0
-(pRecurse) br.call.sptk.many b6=rse_clear_invalid
+(pRecurse) br.call.sptk.many b0=rse_clear_invalid
 
 }{ .mfi	// cycle 2
 	mov loc5=0
@@ -727,7 +914,7 @@
 }{ .mib
 	mov loc6=0
 	mov loc7=0
-(pReturn) br.ret.sptk.many b6
+(pReturn) br.ret.sptk.many b0
 }
 #else /* !CONFIG_ITANIUM */
 	alloc loc0=ar.pfs,2,Nregs-2,2,0
@@ -742,14 +929,14 @@
 	mov loc5=0
 	mov loc6=0
 	mov loc7=0
-(pRecurse) br.call.sptk.many b6=rse_clear_invalid
+(pRecurse) br.call.sptk.many b0=rse_clear_invalid
 	;;
 	mov loc8=0
 	mov loc9=0
 	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
 	mov loc10=0
 	mov loc11=0
-(pReturn) br.ret.sptk.many b6
+(pReturn) br.ret.sptk.many b0
 #endif /* !CONFIG_ITANIUM */
 #	undef pRecurse
 #	undef pReturn
@@ -759,17 +946,22 @@
 	loadrs
 	;;
 skip_rbs_switch:
-	mov b6=rB6
-	mov ar.pfs=rARPFS
-(pUser)	mov ar.bspstore=rARBSPSTORE
-(p9)	mov cr.ifs=rCRIFS
-	mov cr.ipsr=rCRIPSR
-	mov cr.iip=rCRIIP
-	;;
-(pUser)	mov ar.rnat=rARRNAT	// must happen with RSE in lazy mode
-	mov ar.rsc=rARRSC
-	mov ar.unat=rARUNAT
-	mov pr=rARPR,-1
+(pLvSys)mov r19=r0		// clear r19 for leave_syscall, no-op otherwise
+	mov b0=r21
+	mov ar.pfs=r26
+(pUser)	mov ar.bspstore=r23
+(p9)	mov cr.ifs=r30
+(pLvSys)mov r16=r0		// clear r16 for leave_syscall, no-op otherwise
+	mov cr.ipsr=r29
+	mov ar.fpsr=r20
+(pLvSys)mov r17=r0		// clear r17 for leave_syscall, no-op otherwise
+	mov cr.iip=r28
+	;;
+(pUser)	mov ar.rnat=r24		// must happen with RSE in lazy mode
+(pLvSys)mov r18=r0		// clear r18 for leave_syscall, no-op otherwise
+	mov ar.rsc=r27
+	mov ar.unat=r25
+	mov pr=r31,-1
 	rfi
 END(ia64_leave_kernel)
 
@@ -795,7 +987,7 @@
 	;;
 .mem.offset 0,0; st8.spill [r2]=r9	// store errno in pt_regs.r8 and set unat bit
 .mem.offset 8,0; st8.spill [r3]=r10	// store error indication in pt_regs.r10 and set unat bit
-	br.cond.sptk ia64_leave_kernel
+	br.cond.sptk ia64_leave_syscall
 END(handle_syscall_error)
 
 	/*
@@ -906,6 +1098,22 @@
 	.body
 	cmp.eq pNonSys,pSys=r0,r0		// sigreturn isn't a normal syscall...
 	;;
+	/* After signal handler, live registers f6-f11 are restored to the previous
+	 * executing context values for synchronized signals(from exceptions); or they 
+	 * are cleared to 0 for asynchronized signals(from syscalls). These live registers 
+	 * will be put into pt_regs to return back to user space.
+	 */
+	adds r16=PT(F6)+32,sp
+	adds r17=PT(F7)+32,sp
+	;;
+ 	stf.spill [r16]=f6,32	
+ 	stf.spill [r17]=f7,32	
+	;;
+ 	stf.spill [r16]=f8,32	
+ 	stf.spill [r17]=f9,32	
+	;;
+ 	stf.spill [r16]=f10	
+ 	stf.spill [r17]=f11	
 	adds out0=16,sp				// out0 = &sigscratch
 	br.call.sptk.many rp=ia64_rt_sigreturn
 .ret19:	.restore sp 0

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)