patch-2.3.23 linux/arch/sh/lib/memmove.S
Next file: linux/arch/sh/lib/memset.S
Previous file: linux/arch/sh/lib/memcpy.S
Back to the patch index
Back to the overall index
- Lines: 673
- Date:
Mon Oct 18 11:16:13 1999
- Orig file:
v2.3.22/linux/arch/sh/lib/memmove.S
- Orig date:
Tue Aug 31 17:29:13 1999
diff -u --recursive --new-file v2.3.22/linux/arch/sh/lib/memmove.S linux/arch/sh/lib/memmove.S
@@ -1,422 +1,254 @@
+/* $Id: memmove.S,v 1.2 1999/09/21 12:55:49 gniibe Exp $
+ *
+ * "memmove" implementation of SuperH
+ *
+ * Copyright (C) 1999 Niibe Yutaka
+ *
+ */
+
+/*
+ * void *memmove(void *dst, const void *src, size_t n);
+ * The memory areas may overlap.
+ */
+
#include <linux/linkage.h>
ENTRY(memmove)
- mov.l r8,@-r15
- mov.l r9,@-r15
- mov.l r14,@-r15
- sts.l pr,@-r15
- add #-28,r15
- mov r15,r14
- mov.l r4,@r14
- mov.l r5,@(4,r14)
- mov.l r6,@(8,r14)
- mov.l @r14,r1
- mov.l r1,@(12,r14)
- mov.l @(4,r14),r1
- mov.l r1,@(16,r14)
- mov.l @(12,r14),r1
- mov.l @(16,r14),r2
- sub r2,r1
- mov.l @(8,r14),r2
- cmp/hs r2,r1
- bt .L54
- bra .L2
- nop
-.L54:
- mov.l @(8,r14),r1
- mov #15,r2
- cmp/gt r2,r1
- bt .LF100
- bra .L52
- nop
-.LF100:
- mov.l @(12,r14),r2
- neg r2,r1
- mov #3,r2
- and r1,r2
- mov.l @(8,r14),r1
- mov r1,r9
- sub r2,r9
- mov r9,r2
- mov.l r2,@(8,r14)
-.L4:
- mov.l @(12,r14),r2
- neg r2,r1
- mov #3,r2
- and r1,r2
- mov.l r2,@(20,r14)
-.L7:
- mov.l @(20,r14),r1
- cmp/pl r1
- bt .L9
- bra .L6
- nop
- .align 2
-.L9:
- mov r14,r2
- mov r14,r1
- add #24,r1
- mov.l @(16,r14),r2
- mov.b @r2,r3
- mov.b r3,@r1
- mov.l @(16,r14),r1
- mov r1,r2
- add #1,r2
- mov.l r2,@(16,r14)
- mov.l @(20,r14),r1
- mov r1,r2
- add #-1,r2
- mov.l r2,@(20,r14)
- mov.l @(12,r14),r1
- mov r14,r2
- mov r14,r3
- add #24,r3
- mov.b @r3,r2
- mov.b r2,@r1
- mov.l @(12,r14),r1
- mov r1,r2
- add #1,r2
- mov.l r2,@(12,r14)
- bra .L7
- nop
- .align 2
-.L8:
-.L6:
- bra .L5
- nop
- .align 2
-.L10:
- bra .L4
- nop
- .align 2
-.L5:
- nop
-.L11:
- mov.l @(16,r14),r1
- mov #3,r2
- and r1,r2
- tst r2,r2
- bf .L14
- mov r15,r2
- mov.l @(12,r14),r1
- mov.l @(16,r14),r2
- mov.l @(8,r14),r7
- mov r7,r3
- shlr2 r3
- mov r1,r4
- mov r2,r5
- mov r3,r6
- mov.l .L46,r8
- jsr @r8
- nop
- bra .L15
- nop
- .align 2
-.L14:
- mov r15,r2
- mov.l @(12,r14),r1
- mov.l @(16,r14),r2
- mov.l @(8,r14),r7
- mov r7,r3
- shlr2 r3
- mov r1,r4
- mov r2,r5
- mov r3,r6
- mov.l .L47,r8
- jsr @r8
- nop
-.L15:
- mov.l @(8,r14),r1
- mov #-4,r2
- and r2,r1
- mov.l @(16,r14),r2
- add r2,r1
- mov.l r1,@(16,r14)
- mov.l @(8,r14),r1
- mov #-4,r2
- and r2,r1
- mov.l @(12,r14),r2
- add r2,r1
- mov.l r1,@(12,r14)
- mov.l @(8,r14),r1
- mov #3,r2
- and r1,r2
- mov.l r2,@(8,r14)
-.L13:
-.L52:
- bra .L3
- nop
- .align 2
-.L16:
- bra .L11
- nop
- .align 2
-.L12:
-.L3:
- nop
-.L17:
- mov.l @(8,r14),r1
- mov.l r1,@(20,r14)
-.L20:
- mov.l @(20,r14),r1
- cmp/pl r1
- bt .L22
- bra .L19
- nop
- .align 2
-.L22:
- mov r14,r2
- mov r14,r1
- add #24,r1
- mov.l @(16,r14),r2
- mov.b @r2,r3
- mov.b r3,@r1
- mov.l @(16,r14),r1
- mov r1,r2
- add #1,r2
- mov.l r2,@(16,r14)
- mov.l @(20,r14),r1
- mov r1,r2
- add #-1,r2
- mov.l r2,@(20,r14)
- mov.l @(12,r14),r1
- mov r14,r2
- mov r14,r3
- add #24,r3
- mov.b @r3,r2
- mov.b r2,@r1
- mov.l @(12,r14),r1
- mov r1,r2
- add #1,r2
- mov.l r2,@(12,r14)
- bra .L20
- nop
- .align 2
-.L21:
-.L19:
- bra .L18
- nop
- .align 2
-.L23:
- bra .L17
- nop
- .align 2
-.L18:
- bra .L24
- nop
- .align 2
-.L2:
- mov.l @(16,r14),r1
- mov.l @(8,r14),r2
- add r2,r1
- mov.l r1,@(16,r14)
- mov.l @(12,r14),r1
- mov.l @(8,r14),r2
- add r2,r1
- mov.l r1,@(12,r14)
- mov.l @(8,r14),r1
- mov #15,r2
- cmp/gt r2,r1
- bt .LF101
- bra .L53
- nop
-.LF101:
- mov.l @(12,r14),r1
- mov #3,r2
- and r1,r2
- mov.l @(8,r14),r1
- mov r1,r9
- sub r2,r9
- mov r9,r2
- mov.l r2,@(8,r14)
-.L26:
- mov.l @(12,r14),r1
- mov #3,r2
- and r1,r2
- mov.l r2,@(20,r14)
-.L29:
- mov.l @(20,r14),r1
- cmp/pl r1
- bt .L31
- bra .L28
- nop
- .align 2
-.L31:
- mov.l @(16,r14),r1
- mov r1,r2
- add #-1,r2
- mov.l r2,@(16,r14)
- mov r14,r2
- mov r14,r1
- add #24,r1
- mov.l @(16,r14),r2
- mov.b @r2,r3
- mov.b r3,@r1
- mov.l @(12,r14),r1
- mov r1,r2
- add #-1,r2
- mov.l r2,@(12,r14)
- mov.l @(20,r14),r1
- mov r1,r2
- add #-1,r2
- mov.l r2,@(20,r14)
- mov.l @(12,r14),r1
- mov r14,r2
- mov r14,r3
- add #24,r3
- mov.b @r3,r2
- mov.b r2,@r1
- bra .L29
- nop
- .align 2
-.L30:
-.L28:
- bra .L27
- nop
- .align 2
-.L32:
- bra .L26
- nop
- .align 2
-.L27:
- nop
-.L33:
- mov.l @(16,r14),r1
+ ! if dest > src, call memcpy (it copies in decreasing order)
+ cmp/hi r5,r4
+ bf 1f
+ mov.l 2f,r0
+ jmp @r0
+ nop
+ .balign 4
+2: .long SYMBOL_NAME(memcpy)
+1:
+ sub r5,r4 ! From here, r4 has the distance to r0
+ tst r6,r6
+ bt/s 9f ! if n=0, do nothing
+ mov r5,r0
+ add r6,r5
+ mov #12,r1
+ cmp/gt r6,r1
+ bt/s 8f ! if it's too small, copy a byte at once
+ add #-1,r4
+ add #1,r4
+ !
+ ! [ ... ] DST [ ... ] SRC
+ ! [ ... ] [ ... ]
+ ! : :
+ ! r0+r4--> [ ... ] r0 --> [ ... ]
+ ! : :
+ ! [ ... ] [ ... ]
+ ! r5 -->
+ !
+ mov r4,r1
mov #3,r2
- and r1,r2
- tst r2,r2
- bf .L36
- mov r15,r2
- mov.l @(12,r14),r1
- mov.l @(16,r14),r2
- mov.l @(8,r14),r7
- mov r7,r3
- shlr2 r3
- mov r1,r4
- mov r2,r5
- mov r3,r6
- mov.l .L48,r8
- jsr @r8
- nop
- bra .L37
- nop
- .align 2
-.L36:
- mov r15,r2
- mov.l @(12,r14),r1
- mov.l @(16,r14),r2
- mov.l @(8,r14),r7
- mov r7,r3
- shlr2 r3
- mov r1,r4
- mov r2,r5
- mov r3,r6
- mov.l .L49,r8
- jsr @r8
- nop
-.L37:
- mov.l @(8,r14),r1
- mov #-4,r2
and r2,r1
- mov.l @(16,r14),r2
- mov r2,r9
- sub r1,r9
- mov r9,r1
- mov.l r1,@(16,r14)
- mov.l @(8,r14),r1
- mov #-4,r2
- and r2,r1
- mov.l @(12,r14),r2
- mov r2,r9
- sub r1,r9
- mov r9,r1
- mov.l r1,@(12,r14)
- mov.l @(8,r14),r1
- mov #3,r2
- and r1,r2
- mov.l r2,@(8,r14)
-.L35:
-.L53:
- bra .L25
- nop
- .align 2
-.L38:
- bra .L33
- nop
- .align 2
-.L34:
-.L25:
- nop
-.L39:
- mov.l @(8,r14),r1
- mov.l r1,@(20,r14)
-.L42:
- mov.l @(20,r14),r1
- cmp/pl r1
- bt .L44
- bra .L41
- nop
- .align 2
-.L44:
- mov.l @(16,r14),r1
- mov r1,r2
- add #-1,r2
- mov.l r2,@(16,r14)
- mov r14,r2
- mov r14,r1
- add #24,r1
- mov.l @(16,r14),r2
- mov.b @r2,r3
- mov.b r3,@r1
- mov.l @(12,r14),r1
- mov r1,r2
- add #-1,r2
- mov.l r2,@(12,r14)
- mov.l @(20,r14),r1
- mov r1,r2
- add #-1,r2
- mov.l r2,@(20,r14)
- mov.l @(12,r14),r1
- mov r14,r2
- mov r14,r3
- add #24,r3
- mov.b @r3,r2
- mov.b r2,@r1
- bra .L42
- nop
- .align 2
-.L43:
-.L41:
- bra .L24
- nop
- .align 2
-.L45:
- bra .L39
- nop
- .align 2
-.L40:
-.L24:
- mov.l @r14,r1
- mov r1,r0
- bra .L1
- nop
- .align 2
-.L1:
- add #28,r14
- mov r14,r15
- lds.l @r15+,pr
- mov.l @r15+,r14
- mov.l @r15+,r9
- mov.l @r15+,r8
- rts
- nop
-.L50:
- .align 2
-.L46:
- .long __wordcopy_fwd_aligned
-.L47:
- .long __wordcopy_fwd_dest_aligned
-.L48:
- .long __wordcopy_bwd_aligned
-.L49:
- .long __wordcopy_bwd_dest_aligned
-.Lfe1:
+ shll2 r1
+ mov r0,r3 ! Save the value on R0 to R3
+ mova jmptable,r0
+ add r1,r0
+ mov.l @r0,r1
+ jmp @r1
+ mov r3,r0 ! and back to R0
+ .balign 4
+jmptable:
+ .long case0
+ .long case1
+ .long case2
+ .long case3
+
+ ! copy a byte at once
+8: mov.b @r0+,r1
+ cmp/hs r5,r0
+ bf/s 8b ! while (r0<r5)
+ mov.b r1,@(r0,r4)
+ add #1,r4
+9:
+ add r4,r0
+ rts
+ sub r6,r0
+
+case_none:
+ bra 8b
+ add #-1,r4
+
+case0:
+ !
+ ! GHIJ KLMN OPQR --> GHIJ KLMN OPQR
+ !
+ ! First, align to long word boundary
+ mov r0,r3
+ and r2,r3
+ tst r3,r3
+ bt/s 2f
+ add #-1,r4
+ mov #4,r2
+ sub r3,r2
+1: dt r2
+ mov.b @r0+,r1
+ bf/s 1b
+ mov.b r1,@(r0,r4)
+ !
+2: ! Second, copy a long word at once
+ add #-3,r4
+ add #-3,r5
+3: mov.l @r0+,r1
+ cmp/hs r5,r0
+ bf/s 3b
+ mov.l r1,@(r0,r4)
+ add #3,r5
+ !
+ ! Third, copy a byte at once, if necessary
+ cmp/eq r5,r0
+ bt/s 9b
+ add #4,r4
+ bra 8b
+ add #-1,r4
+
+case3:
+ !
+ ! GHIJ KLMN OPQR --> ...G HIJK LMNO PQR.
+ !
+ ! First, align to long word boundary
+ mov r0,r3
+ and r2,r3
+ tst r3,r3
+ bt/s 2f
+ add #-1,r4
+ mov #4,r2
+ sub r3,r2
+1: dt r2
+ mov.b @r0+,r1
+ bf/s 1b
+ mov.b r1,@(r0,r4)
+ !
+2: ! Second, read a long word and write a long word at once
+ add #-2,r4
+ mov.l @(r0,r4),r1
+ add #-7,r5
+ add #-4,r4
+ !
+#ifdef __LITTLE_ENDIAN__
+ shll8 r1
+3: mov r1,r3 ! JIHG
+ shlr8 r3 ! xJIH
+ mov.l @r0+,r1 ! NMLK
+ mov r1,r2
+ shll16 r2
+ shll8 r2 ! Kxxx
+ or r2,r3 ! KJIH
+ cmp/hs r5,r0
+ bf/s 3b
+ mov.l r3,@(r0,r4)
+#else
+ shlr8 r1
+3: mov r1,r3 ! GHIJ
+ shll8 r3 ! HIJx
+ mov.l @r0+,r1 ! KLMN
+ mov r1,r2
+ shlr16 r2
+ shlr8 r2 ! xxxK
+ or r2,r3 ! HIJK
+ cmp/hs r5,r0
+ bf/s 3b
+ mov.l r3,@(r0,r4)
+#endif
+ add #7,r5
+ !
+ ! Third, copy a byte at once, if necessary
+ cmp/eq r5,r0
+ bt/s 9b
+ add #7,r4
+ add #-3,r0
+ bra 8b
+ add #-1,r4
+
+case2:
+ !
+ ! GHIJ KLMN OPQR --> ..GH IJKL MNOP QR..
+ !
+ ! First, align to word boundary
+ tst #1,r0
+ bt/s 2f
+ add #-1,r4
+ mov.b @r0+,r1
+ mov.b r1,@(r0,r4)
+ !
+2: ! Second, read a word and write a word at once
+ add #-1,r4
+ add #-1,r5
+ !
+3: mov.w @r0+,r1
+ cmp/hs r5,r0
+ bf/s 3b
+ mov.w r1,@(r0,r4)
+ add #1,r5
+ !
+ ! Third, copy a byte at once, if necessary
+ cmp/eq r5,r0
+ bt/s 9b
+ add #2,r4
+ mov.b @r0,r1
+ mov.b r1,@(r0,r4)
+ bra 9b
+ add #1,r0
+
+case1:
+ !
+ ! GHIJ KLMN OPQR --> .GHI JKLM NOPQ R...
+ !
+ ! First, align to long word boundary
+ mov r0,r3
+ and r2,r3
+ tst r3,r3
+ bt/s 2f
+ add #-1,r4
+ mov #4,r2
+ sub r3,r2
+1: dt r2
+ mov.b @r0+,r1
+ bf/s 1b
+ mov.b r1,@(r0,r4)
+ !
+2: ! Second, read a long word and write a long word at once
+ mov.l @(r0,r4),r1
+ add #-7,r5
+ add #-4,r4
+ !
+#ifdef __LITTLE_ENDIAN__
+ shll16 r1
+ shll8 r1
+3: mov r1,r3 ! JIHG
+ shlr16 r3
+ shlr8 r3 ! xxxJ
+ mov.l @r0+,r1 ! NMLK
+ mov r1,r2
+ shll8 r2 ! MLKx
+ or r2,r3 ! MLKJ
+ cmp/hs r5,r0
+ bf/s 3b
+ mov.l r3,@(r0,r4)
+#else
+ shlr16 r1
+ shlr8 r1
+3: mov r1,r3 ! GHIJ
+ shll16 r3
+ shll8 r3 ! Jxxx
+ mov.l @r0+,r1 ! KLMN
+ mov r1,r2
+ shlr8 r2 ! xKLM
+ or r2,r3 ! JKLM
+ cmp/hs r5,r0
+ bf/s 3b ! while(r0<r5)
+ mov.l r3,@(r0,r4)
+#endif
+ add #7,r5
+ !
+ ! Third, copy a byte at once, if necessary
+ cmp/eq r5,r0
+ bt/s 9b
+ add #5,r4
+ add #-3,r0
+ bra 8b
+ add #-1,r4
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)