patch-2.3.23 linux/arch/sh/lib/memmove.S

Next file: linux/arch/sh/lib/memset.S
Previous file: linux/arch/sh/lib/memcpy.S
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.22/linux/arch/sh/lib/memmove.S linux/arch/sh/lib/memmove.S
@@ -1,422 +1,254 @@
+/* $Id: memmove.S,v 1.2 1999/09/21 12:55:49 gniibe Exp $
+ *
+ * "memmove" implementation of SuperH
+ *
+ * Copyright (C) 1999  Niibe Yutaka
+ *
+ */
+
+/*
+ * void *memmove(void *dst, const void *src, size_t n);
+ * The memory areas may overlap.
+ */
+
 #include <linux/linkage.h>
 ENTRY(memmove)
-	mov.l	r8,@-r15
-	mov.l	r9,@-r15
-	mov.l	r14,@-r15
-	sts.l	pr,@-r15
-	add	#-28,r15
-	mov	r15,r14
-	mov.l	r4,@r14
-	mov.l	r5,@(4,r14)
-	mov.l	r6,@(8,r14)
-	mov.l	@r14,r1
-	mov.l	r1,@(12,r14)
-	mov.l	@(4,r14),r1
-	mov.l	r1,@(16,r14)
-	mov.l	@(12,r14),r1
-	mov.l	@(16,r14),r2
-	sub	r2,r1
-	mov.l	@(8,r14),r2
-	cmp/hs	r2,r1
-	bt	.L54
-	bra	.L2
-	nop
-.L54:
-	mov.l	@(8,r14),r1
-	mov	#15,r2
-	cmp/gt	r2,r1
-	bt	.LF100
-	bra	.L52
-	nop
-.LF100:
-	mov.l	@(12,r14),r2
-	neg	r2,r1
-	mov	#3,r2
-	and	r1,r2
-	mov.l	@(8,r14),r1
-	mov	r1,r9
-	sub	r2,r9
-	mov	r9,r2
-	mov.l	r2,@(8,r14)
-.L4:
-	mov.l	@(12,r14),r2
-	neg	r2,r1
-	mov	#3,r2
-	and	r1,r2
-	mov.l	r2,@(20,r14)
-.L7:
-	mov.l	@(20,r14),r1
-	cmp/pl	r1
-	bt	.L9
-	bra	.L6
-	nop
-	.align 2
-.L9:
-	mov	r14,r2
-	mov	r14,r1
-	add	#24,r1
-	mov.l	@(16,r14),r2
-	mov.b	@r2,r3
-	mov.b	r3,@r1
-	mov.l	@(16,r14),r1
-	mov	r1,r2
-	add	#1,r2
-	mov.l	r2,@(16,r14)
-	mov.l	@(20,r14),r1
-	mov	r1,r2
-	add	#-1,r2
-	mov.l	r2,@(20,r14)
-	mov.l	@(12,r14),r1
-	mov	r14,r2
-	mov	r14,r3
-	add	#24,r3
-	mov.b	@r3,r2
-	mov.b	r2,@r1
-	mov.l	@(12,r14),r1
-	mov	r1,r2
-	add	#1,r2
-	mov.l	r2,@(12,r14)
-	bra	.L7
-	nop
-	.align 2
-.L8:
-.L6:
-	bra	.L5
-	nop
-	.align 2
-.L10:
-	bra	.L4
-	nop
-	.align 2
-.L5:
-	nop
-.L11:
-	mov.l	@(16,r14),r1
-	mov	#3,r2
-	and	r1,r2
-	tst	r2,r2
-	bf	.L14
-	mov	r15,r2
-	mov.l	@(12,r14),r1
-	mov.l	@(16,r14),r2
-	mov.l	@(8,r14),r7
-	mov	r7,r3
-	shlr2	r3
-	mov	r1,r4
-	mov	r2,r5
-	mov	r3,r6
-	mov.l	.L46,r8
-	jsr	@r8
-	nop
-	bra	.L15
-	nop
-	.align 2
-.L14:
-	mov	r15,r2
-	mov.l	@(12,r14),r1
-	mov.l	@(16,r14),r2
-	mov.l	@(8,r14),r7
-	mov	r7,r3
-	shlr2	r3
-	mov	r1,r4
-	mov	r2,r5
-	mov	r3,r6
-	mov.l	.L47,r8
-	jsr	@r8
-	nop
-.L15:
-	mov.l	@(8,r14),r1
-	mov	#-4,r2
-	and	r2,r1
-	mov.l	@(16,r14),r2
-	add	r2,r1
-	mov.l	r1,@(16,r14)
-	mov.l	@(8,r14),r1
-	mov	#-4,r2
-	and	r2,r1
-	mov.l	@(12,r14),r2
-	add	r2,r1
-	mov.l	r1,@(12,r14)
-	mov.l	@(8,r14),r1
-	mov	#3,r2
-	and	r1,r2
-	mov.l	r2,@(8,r14)
-.L13:
-.L52:
-	bra	.L3
-	nop
-	.align 2
-.L16:
-	bra	.L11
-	nop
-	.align 2
-.L12:
-.L3:
-	nop
-.L17:
-	mov.l	@(8,r14),r1
-	mov.l	r1,@(20,r14)
-.L20:
-	mov.l	@(20,r14),r1
-	cmp/pl	r1
-	bt	.L22
-	bra	.L19
-	nop
-	.align 2
-.L22:
-	mov	r14,r2
-	mov	r14,r1
-	add	#24,r1
-	mov.l	@(16,r14),r2
-	mov.b	@r2,r3
-	mov.b	r3,@r1
-	mov.l	@(16,r14),r1
-	mov	r1,r2
-	add	#1,r2
-	mov.l	r2,@(16,r14)
-	mov.l	@(20,r14),r1
-	mov	r1,r2
-	add	#-1,r2
-	mov.l	r2,@(20,r14)
-	mov.l	@(12,r14),r1
-	mov	r14,r2
-	mov	r14,r3
-	add	#24,r3
-	mov.b	@r3,r2
-	mov.b	r2,@r1
-	mov.l	@(12,r14),r1
-	mov	r1,r2
-	add	#1,r2
-	mov.l	r2,@(12,r14)
-	bra	.L20
-	nop
-	.align 2
-.L21:
-.L19:
-	bra	.L18
-	nop
-	.align 2
-.L23:
-	bra	.L17
-	nop
-	.align 2
-.L18:
-	bra	.L24
-	nop
-	.align 2
-.L2:
-	mov.l	@(16,r14),r1
-	mov.l	@(8,r14),r2
-	add	r2,r1
-	mov.l	r1,@(16,r14)
-	mov.l	@(12,r14),r1
-	mov.l	@(8,r14),r2
-	add	r2,r1
-	mov.l	r1,@(12,r14)
-	mov.l	@(8,r14),r1
-	mov	#15,r2
-	cmp/gt	r2,r1
-	bt	.LF101
-	bra	.L53
-	nop
-.LF101:
-	mov.l	@(12,r14),r1
-	mov	#3,r2
-	and	r1,r2
-	mov.l	@(8,r14),r1
-	mov	r1,r9
-	sub	r2,r9
-	mov	r9,r2
-	mov.l	r2,@(8,r14)
-.L26:
-	mov.l	@(12,r14),r1
-	mov	#3,r2
-	and	r1,r2
-	mov.l	r2,@(20,r14)
-.L29:
-	mov.l	@(20,r14),r1
-	cmp/pl	r1
-	bt	.L31
-	bra	.L28
-	nop
-	.align 2
-.L31:
-	mov.l	@(16,r14),r1
-	mov	r1,r2
-	add	#-1,r2
-	mov.l	r2,@(16,r14)
-	mov	r14,r2
-	mov	r14,r1
-	add	#24,r1
-	mov.l	@(16,r14),r2
-	mov.b	@r2,r3
-	mov.b	r3,@r1
-	mov.l	@(12,r14),r1
-	mov	r1,r2
-	add	#-1,r2
-	mov.l	r2,@(12,r14)
-	mov.l	@(20,r14),r1
-	mov	r1,r2
-	add	#-1,r2
-	mov.l	r2,@(20,r14)
-	mov.l	@(12,r14),r1
-	mov	r14,r2
-	mov	r14,r3
-	add	#24,r3
-	mov.b	@r3,r2
-	mov.b	r2,@r1
-	bra	.L29
-	nop
-	.align 2
-.L30:
-.L28:
-	bra	.L27
-	nop
-	.align 2
-.L32:
-	bra	.L26
-	nop
-	.align 2
-.L27:
-	nop
-.L33:
-	mov.l	@(16,r14),r1
+	! if dest > src, call memcpy (it copies in decreasing order)
+	cmp/hi	r5,r4
+	bf	1f
+	mov.l	2f,r0
+	jmp	@r0
+	 nop
+	.balign 4
+2:	.long	SYMBOL_NAME(memcpy)
+1:
+	sub	r5,r4		! From here, r4 has the distance to r0
+	tst	r6,r6
+	bt/s	9f		! if n=0, do nothing
+	 mov	r5,r0
+	add	r6,r5
+	mov	#12,r1
+	cmp/gt	r6,r1
+	bt/s	8f		! if it's too small, copy a byte at once
+	 add	#-1,r4
+	add	#1,r4
+	!
+	!                [ ...  ] DST             [ ...  ] SRC
+	!	         [ ...  ]                 [ ...  ]
+	!	           :                        :
+	!      r0+r4-->  [ ...  ]       r0    --> [ ...  ]
+	!	           :                        :
+	!	         [ ...  ]                 [ ...  ]
+	!			        r5    -->
+	!
+	mov	r4,r1
 	mov	#3,r2
-	and	r1,r2
-	tst	r2,r2
-	bf	.L36
-	mov	r15,r2
-	mov.l	@(12,r14),r1
-	mov.l	@(16,r14),r2
-	mov.l	@(8,r14),r7
-	mov	r7,r3
-	shlr2	r3
-	mov	r1,r4
-	mov	r2,r5
-	mov	r3,r6
-	mov.l	.L48,r8
-	jsr	@r8
-	nop
-	bra	.L37
-	nop
-	.align 2
-.L36:
-	mov	r15,r2
-	mov.l	@(12,r14),r1
-	mov.l	@(16,r14),r2
-	mov.l	@(8,r14),r7
-	mov	r7,r3
-	shlr2	r3
-	mov	r1,r4
-	mov	r2,r5
-	mov	r3,r6
-	mov.l	.L49,r8
-	jsr	@r8
-	nop
-.L37:
-	mov.l	@(8,r14),r1
-	mov	#-4,r2
 	and	r2,r1
-	mov.l	@(16,r14),r2
-	mov	r2,r9
-	sub	r1,r9
-	mov	r9,r1
-	mov.l	r1,@(16,r14)
-	mov.l	@(8,r14),r1
-	mov	#-4,r2
-	and	r2,r1
-	mov.l	@(12,r14),r2
-	mov	r2,r9
-	sub	r1,r9
-	mov	r9,r1
-	mov.l	r1,@(12,r14)
-	mov.l	@(8,r14),r1
-	mov	#3,r2
-	and	r1,r2
-	mov.l	r2,@(8,r14)
-.L35:
-.L53:
-	bra	.L25
-	nop
-	.align 2
-.L38:
-	bra	.L33
-	nop
-	.align 2
-.L34:
-.L25:
-	nop
-.L39:
-	mov.l	@(8,r14),r1
-	mov.l	r1,@(20,r14)
-.L42:
-	mov.l	@(20,r14),r1
-	cmp/pl	r1
-	bt	.L44
-	bra	.L41
-	nop
-	.align 2
-.L44:
-	mov.l	@(16,r14),r1
-	mov	r1,r2
-	add	#-1,r2
-	mov.l	r2,@(16,r14)
-	mov	r14,r2
-	mov	r14,r1
-	add	#24,r1
-	mov.l	@(16,r14),r2
-	mov.b	@r2,r3
-	mov.b	r3,@r1
-	mov.l	@(12,r14),r1
-	mov	r1,r2
-	add	#-1,r2
-	mov.l	r2,@(12,r14)
-	mov.l	@(20,r14),r1
-	mov	r1,r2
-	add	#-1,r2
-	mov.l	r2,@(20,r14)
-	mov.l	@(12,r14),r1
-	mov	r14,r2
-	mov	r14,r3
-	add	#24,r3
-	mov.b	@r3,r2
-	mov.b	r2,@r1
-	bra	.L42
-	nop
-	.align 2
-.L43:
-.L41:
-	bra	.L24
-	nop
-	.align 2
-.L45:
-	bra	.L39
-	nop
-	.align 2
-.L40:
-.L24:
-	mov.l	@r14,r1
-	mov	r1,r0
-	bra	.L1
-	nop
-	.align 2
-.L1:
-	add	#28,r14
-	mov	r14,r15
-	lds.l	@r15+,pr
-	mov.l	@r15+,r14
-	mov.l	@r15+,r9
-	mov.l	@r15+,r8
-	rts	
-	 nop
-.L50:
-	.align 2
-.L46:
-	.long	__wordcopy_fwd_aligned
-.L47:
-	.long	__wordcopy_fwd_dest_aligned
-.L48:
-	.long	__wordcopy_bwd_aligned
-.L49:
-	.long	__wordcopy_bwd_dest_aligned
-.Lfe1:
+	shll2	r1
+	mov	r0,r3		! Save the value on R0 to R3
+	mova	jmptable,r0
+	add	r1,r0
+	mov.l	@r0,r1
+	jmp	@r1
+	 mov	r3,r0		! and back to R0
+	.balign	4
+jmptable:
+	.long	case0
+	.long	case1
+	.long	case2
+	.long	case3
+
+	! copy a byte at once
+8:	mov.b	@r0+,r1
+	cmp/hs	r5,r0
+	bf/s	8b			! while (r0<r5)
+	 mov.b	r1,@(r0,r4)
+	add	#1,r4
+9:
+	add	r4,r0
+	rts
+	 sub	r6,r0
+
+case_none:
+	bra	8b
+	 add	#-1,r4
+
+case0:
+	!
+	!	GHIJ KLMN OPQR -->  GHIJ KLMN OPQR
+	!
+	! First, align to long word boundary
+	mov	r0,r3
+	and	r2,r3
+	tst	r3,r3
+	bt/s	2f
+	 add	#-1,r4
+	mov	#4,r2
+	sub	r3,r2
+1:	dt	r2
+	mov.b	@r0+,r1
+	bf/s	1b
+	 mov.b	r1,@(r0,r4)
+	!
+2:	! Second, copy a long word at once
+	add	#-3,r4
+	add	#-3,r5
+3:	mov.l	@r0+,r1
+	cmp/hs	r5,r0
+	bf/s	3b
+	 mov.l	r1,@(r0,r4)
+	add	#3,r5
+	!
+	! Third, copy a byte at once, if necessary
+	cmp/eq	r5,r0
+	bt/s	9b
+	 add	#4,r4
+	bra	8b
+	 add	#-1,r4
+
+case3:
+	!
+	!	GHIJ KLMN OPQR -->  ...G HIJK LMNO PQR.
+	!
+	! First, align to long word boundary
+	mov	r0,r3
+	and	r2,r3
+	tst	r3,r3
+	bt/s	2f
+	 add	#-1,r4
+	mov	#4,r2
+	sub	r3,r2
+1:	dt	r2
+	mov.b	@r0+,r1
+	bf/s	1b
+	 mov.b	r1,@(r0,r4)
+	!
+2:	! Second, read a long word and write a long word at once
+	add	#-2,r4
+	mov.l	@(r0,r4),r1
+	add	#-7,r5
+	add	#-4,r4
+	!
+#ifdef __LITTLE_ENDIAN__
+	shll8	r1
+3:	mov	r1,r3		! JIHG
+	shlr8	r3		! xJIH
+	mov.l	@r0+,r1		! NMLK
+	mov	r1,r2
+	shll16	r2
+	shll8	r2		! Kxxx
+	or	r2,r3		! KJIH
+	cmp/hs	r5,r0
+	bf/s	3b
+	 mov.l	r3,@(r0,r4)
+#else
+	shlr8	r1
+3:	mov	r1,r3		! GHIJ
+	shll8	r3		! HIJx
+	mov.l	@r0+,r1		! KLMN
+	mov	r1,r2
+	shlr16	r2
+	shlr8	r2		! xxxK
+	or	r2,r3		! HIJK
+	cmp/hs	r5,r0
+	bf/s	3b
+	 mov.l	r3,@(r0,r4)
+#endif
+	add	#7,r5
+	!
+	! Third, copy a byte at once, if necessary
+	cmp/eq	r5,r0
+	bt/s	9b
+	 add	#7,r4
+	add	#-3,r0
+	bra	8b
+	 add	#-1,r4
+
+case2:
+	!
+	!	GHIJ KLMN OPQR -->  ..GH IJKL MNOP QR..
+	!
+	! First, align to word boundary
+	tst	#1,r0
+	bt/s	2f
+	 add	#-1,r4
+	mov.b	@r0+,r1
+	mov.b	r1,@(r0,r4)
+	!
+2:	! Second, read a word and write a word at once
+	add	#-1,r4
+	add	#-1,r5
+	!
+3:	mov.w	@r0+,r1
+	cmp/hs	r5,r0
+	bf/s	3b
+	 mov.w	r1,@(r0,r4)
+	add	#1,r5
+	!
+	! Third, copy a byte at once, if necessary
+	cmp/eq	r5,r0
+	bt/s	9b
+	 add	#2,r4
+	mov.b	@r0,r1
+	mov.b	r1,@(r0,r4)
+	bra	9b
+	 add	#1,r0
+
+case1:
+	!
+	!	GHIJ KLMN OPQR -->  .GHI JKLM NOPQ R...
+	!
+	! First, align to long word boundary
+	mov	r0,r3
+	and	r2,r3
+	tst	r3,r3
+	bt/s	2f
+	 add	#-1,r4
+	mov	#4,r2
+	sub	r3,r2
+1:	dt	r2
+	mov.b	@r0+,r1
+	bf/s	1b
+	 mov.b	r1,@(r0,r4)
+	!
+2:	! Second, read a long word and write a long word at once
+	mov.l	@(r0,r4),r1
+	add	#-7,r5
+	add	#-4,r4
+	!
+#ifdef __LITTLE_ENDIAN__
+	shll16	r1
+	shll8	r1
+3:	mov	r1,r3		! JIHG
+	shlr16	r3
+	shlr8	r3		! xxxJ
+	mov.l	@r0+,r1		! NMLK
+	mov	r1,r2
+	shll8	r2		! MLKx
+	or	r2,r3		! MLKJ
+	cmp/hs	r5,r0
+	bf/s	3b
+	 mov.l	r3,@(r0,r4)
+#else
+	shlr16	r1
+	shlr8	r1
+3:	mov	r1,r3		! GHIJ
+	shll16	r3
+	shll8	r3		! Jxxx
+	mov.l	@r0+,r1		! KLMN
+	mov	r1,r2
+	shlr8	r2		! xKLM
+	or	r2,r3		! JKLM
+	cmp/hs	r5,r0
+	bf/s	3b		! while(r0<r5)
+	 mov.l	r3,@(r0,r4)
+#endif
+	add	#7,r5
+	!
+	! Third, copy a byte at once, if necessary
+	cmp/eq	r5,r0
+	bt/s	9b
+	 add	#5,r4
+	add	#-3,r0
+	bra	8b
+	 add	#-1,r4

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)