Blame view

arch/frv/lib/memcpy.S 3.33 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
  /* memcpy.S: optimised assembly memcpy
   *
   * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
   * Written by David Howells (dhowells@redhat.com)
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License
   * as published by the Free Software Foundation; either version
   * 2 of the License, or (at your option) any later version.
   */
  
  
          .text
          .p2align	4
  
  ###############################################################################
  #
  # void *memcpy(void *to, const char *from, size_t count)
  #
  # - NOTE: must not use any stack. exception detection performs function return
  #         to caller's fixup routine, aborting the remainder of the copy
  #
  ###############################################################################
          .globl		memcpy,__memcpy_end
          .type		memcpy,@function
  memcpy:
  	or.p		gr8,gr9,gr4
  	orcc		gr10,gr0,gr0,icc3
  	or.p		gr10,gr4,gr4
  	beqlr		icc3,#0
  
  	# optimise based on best common alignment for to, from & count
  	andicc.p	gr4,#0x0f,gr0,icc0
  	setlos		#8,gr11
  	andicc.p	gr4,#0x07,gr0,icc1
  	beq		icc0,#0,memcpy_16
  	andicc.p	gr4,#0x03,gr0,icc0
  	beq		icc1,#0,memcpy_8
  	andicc.p	gr4,#0x01,gr0,icc1
  	beq		icc0,#0,memcpy_4
  	setlos.p	#1,gr11
  	beq		icc1,#0,memcpy_2
  
  	# do byte by byte copy
  	sub.p		gr8,gr11,gr3
  	sub		gr9,gr11,gr9
  0:	ldubu.p		@(gr9,gr11),gr4
  	subicc		gr10,#1,gr10,icc0
  	stbu.p		gr4,@(gr3,gr11)
  	bne		icc0,#2,0b
  	bralr
  
  	# do halfword by halfword copy
  memcpy_2:
  	setlos		#2,gr11
  	sub.p		gr8,gr11,gr3
  	sub		gr9,gr11,gr9
  0:	lduhu.p		@(gr9,gr11),gr4
  	subicc		gr10,#2,gr10,icc0
  	sthu.p		gr4,@(gr3,gr11)
  	bne		icc0,#2,0b
  	bralr
  
  	# do word by word copy
  memcpy_4:
  	setlos		#4,gr11
  	sub.p		gr8,gr11,gr3
  	sub		gr9,gr11,gr9
  0:	ldu.p		@(gr9,gr11),gr4
  	subicc		gr10,#4,gr10,icc0
  	stu.p		gr4,@(gr3,gr11)
  	bne		icc0,#2,0b
  	bralr
  
  	# do double-word by double-word copy
  memcpy_8:
  	sub.p		gr8,gr11,gr3
  	sub		gr9,gr11,gr9
  0:	lddu.p		@(gr9,gr11),gr4
  	subicc		gr10,#8,gr10,icc0
  	stdu.p		gr4,@(gr3,gr11)
  	bne		icc0,#2,0b
  	bralr
  
  	# do quad-word by quad-word copy
  memcpy_16:
  	sub.p		gr8,gr11,gr3
  	sub		gr9,gr11,gr9
  0:	lddu		@(gr9,gr11),gr4
  	lddu.p		@(gr9,gr11),gr6
  	subicc		gr10,#16,gr10,icc0
  	stdu		gr4,@(gr3,gr11)
  	stdu.p		gr6,@(gr3,gr11)
  	bne		icc0,#2,0b
  	bralr
  __memcpy_end:
  
  	.size		memcpy, __memcpy_end-memcpy
  
  ###############################################################################
  #
  # copy to/from userspace
  # - return the number of bytes that could not be copied (0 on complete success)
  #
  # long __memcpy_user(void *dst, const void *src, size_t count)
  #
  ###############################################################################
          .globl		__memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler
          .type		__memcpy_user,@function
  __memcpy_user:
  	movsg		lr,gr7
  	subi.p		sp,#8,sp
  	add		gr8,gr10,gr6		; calculate expected end address
  	stdi		gr6,@(sp,#0)
  
  	# abuse memcpy to do the dirty work
  	call		memcpy
  __memcpy_user_error_lr:
  	ldi.p		@(sp,#4),gr7
  	setlos		#0,gr8
  	jmpl.p		@(gr7,gr0)
  	addi		sp,#8,sp
  
  	# deal any exception generated by memcpy
  	# GR8 - memcpy's current dest address
  	# GR11 - memset's step value (index register for store insns)
  __memcpy_user_error_handler:
  	lddi.p		@(sp,#0),gr4		; load GR4 with dst+count, GR5 with ret addr
  	add		gr11,gr3,gr7
  	sub.p		gr4,gr7,gr8
  
  	addi		sp,#8,sp
  	jmpl		@(gr5,gr0)
  
  	.size		__memcpy_user, .-__memcpy_user