Commit eea9507a69d637d52705de8703b168bf6bfe5643
1 parent
b53e906d25
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
nios2: Library functions
Add optimised library functions for nios2. Signed-off-by: Ley Foon Tan <lftan@altera.com>
Showing 5 changed files with 467 additions and 0 deletions Side-by-side Diff
arch/nios2/include/asm/checksum.h
1 | +/* | |
2 | + * Copyright (C) 2010 Tobias Klauser <tklauser@distanz.ch> | |
3 | + * Copyright (C) 2004 Microtronix Datacom Ltd. | |
4 | + * | |
5 | + * This file is subject to the terms and conditions of the GNU General Public | |
6 | + * License. See the file "COPYING" in the main directory of this archive | |
7 | + * for more details. | |
8 | + */ | |
9 | + | |
10 | +#ifndef _ASM_NIOS_CHECKSUM_H | |
11 | +#define _ASM_NIOS_CHECKSUM_H | |
12 | + | |
13 | +/* Take these from lib/checksum.c */ | |
14 | +extern __wsum csum_partial(const void *buff, int len, __wsum sum); | |
15 | +extern __wsum csum_partial_copy(const void *src, void *dst, int len, | |
16 | + __wsum sum); | |
17 | +extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, | |
18 | + int len, __wsum sum, int *csum_err); | |
19 | +#define csum_partial_copy_nocheck(src, dst, len, sum) \ | |
20 | + csum_partial_copy((src), (dst), (len), (sum)) | |
21 | + | |
22 | +extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); | |
23 | +extern __sum16 ip_compute_csum(const void *buff, int len); | |
24 | + | |
25 | +/* | |
26 | + * Fold a partial checksum | |
27 | + */ | |
28 | +static inline __sum16 csum_fold(__wsum sum) | |
29 | +{ | |
30 | + __asm__ __volatile__( | |
31 | + "add %0, %1, %0\n" | |
32 | + "cmpltu r8, %0, %1\n" | |
33 | + "srli %0, %0, 16\n" | |
34 | + "add %0, %0, r8\n" | |
35 | + "nor %0, %0, %0\n" | |
36 | + : "=r" (sum) | |
37 | + : "r" (sum << 16), "0" (sum) | |
38 | + : "r8"); | |
39 | + return (__force __sum16) sum; | |
40 | +} | |
41 | + | |
42 | +/* | |
43 | + * computes the checksum of the TCP/UDP pseudo-header | |
44 | + * returns a 16-bit checksum, already complemented | |
45 | + */ | |
46 | +#define csum_tcpudp_nofold csum_tcpudp_nofold | |
47 | +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, | |
48 | + unsigned short len, | |
49 | + unsigned short proto, | |
50 | + __wsum sum) | |
51 | +{ | |
52 | + __asm__ __volatile__( | |
53 | + "add %0, %1, %0\n" | |
54 | + "cmpltu r8, %0, %1\n" | |
55 | + "add %0, %0, r8\n" /* add carry */ | |
56 | + "add %0, %2, %0\n" | |
57 | + "cmpltu r8, %0, %2\n" | |
58 | + "add %0, %0, r8\n" /* add carry */ | |
59 | + "add %0, %3, %0\n" | |
60 | + "cmpltu r8, %0, %3\n" | |
61 | + "add %0, %0, r8\n" /* add carry */ | |
62 | + : "=r" (sum), "=r" (saddr) | |
63 | + : "r" (daddr), "r" ((ntohs(len) << 16) + (proto * 256)), | |
64 | + "0" (sum), | |
65 | + "1" (saddr) | |
66 | + : "r8"); | |
67 | + | |
68 | + return sum; | |
69 | +} | |
70 | + | |
71 | +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, | |
72 | + unsigned short len, | |
73 | + unsigned short proto, __wsum sum) | |
74 | +{ | |
75 | + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); | |
76 | +} | |
77 | + | |
78 | +#endif /* _ASM_NIOS_CHECKSUM_H */ |
arch/nios2/include/asm/string.h
1 | +/* | |
2 | + * Copyright (C) 2004 Microtronix Datacom Ltd | |
3 | + * | |
4 | + * This file is subject to the terms and conditions of the GNU General Public | |
5 | + * License. See the file "COPYING" in the main directory of this archive | |
6 | + * for more details. | |
7 | + */ | |
8 | + | |
9 | +#ifndef _ASM_NIOS2_STRING_H | |
10 | +#define _ASM_NIOS2_STRING_H | |
11 | + | |
12 | +#ifdef __KERNEL__ | |
13 | + | |
14 | +#define __HAVE_ARCH_MEMSET | |
15 | +#define __HAVE_ARCH_MEMCPY | |
16 | +#define __HAVE_ARCH_MEMMOVE | |
17 | + | |
18 | +extern void *memset(void *s, int c, size_t count); | |
19 | +extern void *memcpy(void *d, const void *s, size_t count); | |
20 | +extern void *memmove(void *d, const void *s, size_t count); | |
21 | + | |
22 | +#endif /* __KERNEL__ */ | |
23 | + | |
24 | +#endif /* _ASM_NIOS2_STRING_H */ |
arch/nios2/lib/memcpy.c
1 | +/* Extracted from GLIBC memcpy.c and memcopy.h, which is: | |
2 | + Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. | |
3 | + This file is part of the GNU C Library. | |
4 | + Contributed by Torbjorn Granlund (tege@sics.se). | |
5 | + | |
6 | + The GNU C Library is free software; you can redistribute it and/or | |
7 | + modify it under the terms of the GNU Lesser General Public | |
8 | + License as published by the Free Software Foundation; either | |
9 | + version 2.1 of the License, or (at your option) any later version. | |
10 | + | |
11 | + The GNU C Library is distributed in the hope that it will be useful, | |
12 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | + Lesser General Public License for more details. | |
15 | + | |
16 | + You should have received a copy of the GNU Lesser General Public | |
17 | + License along with the GNU C Library; if not, see | |
18 | + <http://www.gnu.org/licenses/>. */ | |
19 | + | |
20 | +#include <linux/types.h> | |
21 | + | |
22 | +/* Type to use for aligned memory operations. | |
23 | + This should normally be the biggest type supported by a single load | |
24 | + and store. */ | |
25 | +#define op_t unsigned long int | |
26 | +#define OPSIZ (sizeof(op_t)) | |
27 | + | |
28 | +/* Optimal type for storing bytes in registers. */ | |
29 | +#define reg_char char | |
30 | + | |
31 | +#define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) | |
32 | + | |
33 | +/* Copy exactly NBYTES bytes from SRC_BP to DST_BP, | |
34 | + without any assumptions about alignment of the pointers. */ | |
35 | +#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ | |
36 | +do { \ | |
37 | + size_t __nbytes = (nbytes); \ | |
38 | + while (__nbytes > 0) { \ | |
39 | + unsigned char __x = ((unsigned char *) src_bp)[0]; \ | |
40 | + src_bp += 1; \ | |
41 | + __nbytes -= 1; \ | |
42 | + ((unsigned char *) dst_bp)[0] = __x; \ | |
43 | + dst_bp += 1; \ | |
44 | + } \ | |
45 | +} while (0) | |
46 | + | |
47 | +/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with | |
48 | + the assumption that DST_BP is aligned on an OPSIZ multiple. If | |
49 | + not all bytes could be easily copied, store remaining number of bytes | |
50 | + in NBYTES_LEFT, otherwise store 0. */ | |
51 | +/* extern void _wordcopy_fwd_aligned __P ((long int, long int, size_t)); */ | |
52 | +/* extern void _wordcopy_fwd_dest_aligned __P ((long int, long int, size_t)); */ | |
53 | +#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ | |
54 | +do { \ | |
55 | + if (src_bp % OPSIZ == 0) \ | |
56 | + _wordcopy_fwd_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\ | |
57 | + else \ | |
58 | + _wordcopy_fwd_dest_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\ | |
59 | + src_bp += (nbytes) & -OPSIZ; \ | |
60 | + dst_bp += (nbytes) & -OPSIZ; \ | |
61 | + (nbytes_left) = (nbytes) % OPSIZ; \ | |
62 | +} while (0) | |
63 | + | |
64 | + | |
65 | +/* Threshold value for when to enter the unrolled loops. */ | |
66 | +#define OP_T_THRES 16 | |
67 | + | |
68 | +/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to | |
69 | + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | |
70 | + Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ | |
71 | +/* stream-lined (read x8 + write x8) */ | |
72 | +static void _wordcopy_fwd_aligned(long int dstp, long int srcp, size_t len) | |
73 | +{ | |
74 | + while (len > 7) { | |
75 | + register op_t a0, a1, a2, a3, a4, a5, a6, a7; | |
76 | + | |
77 | + a0 = ((op_t *) srcp)[0]; | |
78 | + a1 = ((op_t *) srcp)[1]; | |
79 | + a2 = ((op_t *) srcp)[2]; | |
80 | + a3 = ((op_t *) srcp)[3]; | |
81 | + a4 = ((op_t *) srcp)[4]; | |
82 | + a5 = ((op_t *) srcp)[5]; | |
83 | + a6 = ((op_t *) srcp)[6]; | |
84 | + a7 = ((op_t *) srcp)[7]; | |
85 | + ((op_t *) dstp)[0] = a0; | |
86 | + ((op_t *) dstp)[1] = a1; | |
87 | + ((op_t *) dstp)[2] = a2; | |
88 | + ((op_t *) dstp)[3] = a3; | |
89 | + ((op_t *) dstp)[4] = a4; | |
90 | + ((op_t *) dstp)[5] = a5; | |
91 | + ((op_t *) dstp)[6] = a6; | |
92 | + ((op_t *) dstp)[7] = a7; | |
93 | + | |
94 | + srcp += 8 * OPSIZ; | |
95 | + dstp += 8 * OPSIZ; | |
96 | + len -= 8; | |
97 | + } | |
98 | + while (len > 0) { | |
99 | + *(op_t *)dstp = *(op_t *)srcp; | |
100 | + | |
101 | + srcp += OPSIZ; | |
102 | + dstp += OPSIZ; | |
103 | + len -= 1; | |
104 | + } | |
105 | +} | |
106 | + | |
107 | +/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to | |
108 | + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | |
109 | + DSTP should be aligned for memory operations on `op_t's, but SRCP must | |
110 | + *not* be aligned. */ | |
111 | +/* stream-lined (read x4 + write x4) */ | |
112 | +static void _wordcopy_fwd_dest_aligned(long int dstp, long int srcp, | |
113 | + size_t len) | |
114 | +{ | |
115 | + op_t ap; | |
116 | + int sh_1, sh_2; | |
117 | + | |
118 | + /* Calculate how to shift a word read at the memory operation | |
119 | + aligned srcp to make it aligned for copy. */ | |
120 | + | |
121 | + sh_1 = 8 * (srcp % OPSIZ); | |
122 | + sh_2 = 8 * OPSIZ - sh_1; | |
123 | + | |
124 | + /* Make SRCP aligned by rounding it down to the beginning of the `op_t' | |
125 | + it points in the middle of. */ | |
126 | + srcp &= -OPSIZ; | |
127 | + ap = ((op_t *) srcp)[0]; | |
128 | + srcp += OPSIZ; | |
129 | + | |
130 | + while (len > 3) { | |
131 | + op_t a0, a1, a2, a3; | |
132 | + | |
133 | + a0 = ((op_t *) srcp)[0]; | |
134 | + a1 = ((op_t *) srcp)[1]; | |
135 | + a2 = ((op_t *) srcp)[2]; | |
136 | + a3 = ((op_t *) srcp)[3]; | |
137 | + ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2); | |
138 | + ((op_t *) dstp)[1] = MERGE(a0, sh_1, a1, sh_2); | |
139 | + ((op_t *) dstp)[2] = MERGE(a1, sh_1, a2, sh_2); | |
140 | + ((op_t *) dstp)[3] = MERGE(a2, sh_1, a3, sh_2); | |
141 | + | |
142 | + ap = a3; | |
143 | + srcp += 4 * OPSIZ; | |
144 | + dstp += 4 * OPSIZ; | |
145 | + len -= 4; | |
146 | + } | |
147 | + while (len > 0) { | |
148 | + register op_t a0; | |
149 | + | |
150 | + a0 = ((op_t *) srcp)[0]; | |
151 | + ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2); | |
152 | + | |
153 | + ap = a0; | |
154 | + srcp += OPSIZ; | |
155 | + dstp += OPSIZ; | |
156 | + len -= 1; | |
157 | + } | |
158 | +} | |
159 | + | |
160 | +void *memcpy(void *dstpp, const void *srcpp, size_t len) | |
161 | +{ | |
162 | + unsigned long int dstp = (long int) dstpp; | |
163 | + unsigned long int srcp = (long int) srcpp; | |
164 | + | |
165 | + /* Copy from the beginning to the end. */ | |
166 | + | |
167 | + /* If there not too few bytes to copy, use word copy. */ | |
168 | + if (len >= OP_T_THRES) { | |
169 | + /* Copy just a few bytes to make DSTP aligned. */ | |
170 | + len -= (-dstp) % OPSIZ; | |
171 | + BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); | |
172 | + | |
173 | + /* Copy whole pages from SRCP to DSTP by virtual address | |
174 | + manipulation, as much as possible. */ | |
175 | + | |
176 | + /* PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len); */ | |
177 | + | |
178 | + /* Copy from SRCP to DSTP taking advantage of the known | |
179 | + alignment of DSTP. Number of bytes remaining is put in the | |
180 | + third argument, i.e. in LEN. This number may vary from | |
181 | + machine to machine. */ | |
182 | + | |
183 | + WORD_COPY_FWD(dstp, srcp, len, len); | |
184 | + | |
185 | + /* Fall out and copy the tail. */ | |
186 | + } | |
187 | + | |
188 | + /* There are just a few bytes to copy. Use byte memory operations. */ | |
189 | + BYTE_COPY_FWD(dstp, srcp, len); | |
190 | + | |
191 | + return dstpp; | |
192 | +} | |
193 | + | |
194 | +void *memcpyb(void *dstpp, const void *srcpp, unsigned len) | |
195 | +{ | |
196 | + unsigned long int dstp = (long int) dstpp; | |
197 | + unsigned long int srcp = (long int) srcpp; | |
198 | + | |
199 | + BYTE_COPY_FWD(dstp, srcp, len); | |
200 | + | |
201 | + return dstpp; | |
202 | +} |
arch/nios2/lib/memmove.c
1 | +/* | |
2 | + * Copyright (C) 2011 Tobias Klauser <tklauser@distanz.ch> | |
3 | + * Copyright (C) 2004 Microtronix Datacom Ltd | |
4 | + * | |
5 | + * This file is subject to the terms and conditions of the GNU General Public | |
6 | + * License. See the file "COPYING" in the main directory of this archive | |
7 | + * for more details. | |
8 | + */ | |
9 | + | |
10 | +#include <linux/types.h> | |
11 | +#include <linux/string.h> | |
12 | + | |
13 | +#ifdef __HAVE_ARCH_MEMMOVE | |
14 | +void *memmove(void *d, const void *s, size_t count) | |
15 | +{ | |
16 | + unsigned long dst, src; | |
17 | + | |
18 | + if (!count) | |
19 | + return d; | |
20 | + | |
21 | + if (d < s) { | |
22 | + dst = (unsigned long) d; | |
23 | + src = (unsigned long) s; | |
24 | + | |
25 | + if ((count < 8) || ((dst ^ src) & 3)) | |
26 | + goto restup; | |
27 | + | |
28 | + if (dst & 1) { | |
29 | + *(char *)dst++ = *(char *)src++; | |
30 | + count--; | |
31 | + } | |
32 | + if (dst & 2) { | |
33 | + *(short *)dst = *(short *)src; | |
34 | + src += 2; | |
35 | + dst += 2; | |
36 | + count -= 2; | |
37 | + } | |
38 | + while (count > 3) { | |
39 | + *(long *)dst = *(long *)src; | |
40 | + src += 4; | |
41 | + dst += 4; | |
42 | + count -= 4; | |
43 | + } | |
44 | +restup: | |
45 | + while (count--) | |
46 | + *(char *)dst++ = *(char *)src++; | |
47 | + } else { | |
48 | + dst = (unsigned long) d + count; | |
49 | + src = (unsigned long) s + count; | |
50 | + | |
51 | + if ((count < 8) || ((dst ^ src) & 3)) | |
52 | + goto restdown; | |
53 | + | |
54 | + if (dst & 1) { | |
55 | + src--; | |
56 | + dst--; | |
57 | + count--; | |
58 | + *(char *)dst = *(char *)src; | |
59 | + } | |
60 | + if (dst & 2) { | |
61 | + src -= 2; | |
62 | + dst -= 2; | |
63 | + count -= 2; | |
64 | + *(short *)dst = *(short *)src; | |
65 | + } | |
66 | + while (count > 3) { | |
67 | + src -= 4; | |
68 | + dst -= 4; | |
69 | + count -= 4; | |
70 | + *(long *)dst = *(long *)src; | |
71 | + } | |
72 | +restdown: | |
73 | + while (count--) { | |
74 | + src--; | |
75 | + dst--; | |
76 | + *(char *)dst = *(char *)src; | |
77 | + } | |
78 | + } | |
79 | + | |
80 | + return d; | |
81 | +} | |
82 | +#endif /* __HAVE_ARCH_MEMMOVE */ |
arch/nios2/lib/memset.c
1 | +/* | |
2 | + * Copyright (C) 2011 Tobias Klauser <tklauser@distanz.ch> | |
3 | + * Copyright (C) 2004 Microtronix Datacom Ltd | |
4 | + * | |
5 | + * This file is subject to the terms and conditions of the GNU General Public | |
6 | + * License. See the file "COPYING" in the main directory of this archive | |
7 | + * for more details. | |
8 | + */ | |
9 | + | |
10 | +#include <linux/types.h> | |
11 | +#include <linux/string.h> | |
12 | + | |
13 | +#ifdef __HAVE_ARCH_MEMSET | |
14 | +void *memset(void *s, int c, size_t count) | |
15 | +{ | |
16 | + int destptr, charcnt, dwordcnt, fill8reg, wrkrega; | |
17 | + | |
18 | + if (!count) | |
19 | + return s; | |
20 | + | |
21 | + c &= 0xFF; | |
22 | + | |
23 | + if (count <= 8) { | |
24 | + char *xs = (char *) s; | |
25 | + | |
26 | + while (count--) | |
27 | + *xs++ = c; | |
28 | + return s; | |
29 | + } | |
30 | + | |
31 | + __asm__ __volatile__ ( | |
32 | + /* fill8 %3, %5 (c & 0xff) */ | |
33 | + " slli %4, %5, 8\n" | |
34 | + " or %4, %4, %5\n" | |
35 | + " slli %3, %4, 16\n" | |
36 | + " or %3, %3, %4\n" | |
37 | + /* Word-align %0 (s) if necessary */ | |
38 | + " andi %4, %0, 0x01\n" | |
39 | + " beq %4, zero, 1f\n" | |
40 | + " addi %1, %1, -1\n" | |
41 | + " stb %3, 0(%0)\n" | |
42 | + " addi %0, %0, 1\n" | |
43 | + "1: mov %2, %1\n" | |
44 | + /* Dword-align %0 (s) if necessary */ | |
45 | + " andi %4, %0, 0x02\n" | |
46 | + " beq %4, zero, 2f\n" | |
47 | + " addi %1, %1, -2\n" | |
48 | + " sth %3, 0(%0)\n" | |
49 | + " addi %0, %0, 2\n" | |
50 | + " mov %2, %1\n" | |
51 | + /* %1 and %2 are how many more bytes to set */ | |
52 | + "2: srli %2, %2, 2\n" | |
53 | + /* %2 is how many dwords to set */ | |
54 | + "3: stw %3, 0(%0)\n" | |
55 | + " addi %0, %0, 4\n" | |
56 | + " addi %2, %2, -1\n" | |
57 | + " bne %2, zero, 3b\n" | |
58 | + /* store residual word and/or byte if necessary */ | |
59 | + " andi %4, %1, 0x02\n" | |
60 | + " beq %4, zero, 4f\n" | |
61 | + " sth %3, 0(%0)\n" | |
62 | + " addi %0, %0, 2\n" | |
63 | + /* store residual byte if necessary */ | |
64 | + "4: andi %4, %1, 0x01\n" | |
65 | + " beq %4, zero, 5f\n" | |
66 | + " stb %3, 0(%0)\n" | |
67 | + "5:\n" | |
68 | + : "=r" (destptr), /* %0 Output */ | |
69 | + "=r" (charcnt), /* %1 Output */ | |
70 | + "=r" (dwordcnt), /* %2 Output */ | |
71 | + "=r" (fill8reg), /* %3 Output */ | |
72 | + "=r" (wrkrega) /* %4 Output */ | |
73 | + : "r" (c), /* %5 Input */ | |
74 | + "0" (s), /* %0 Input/Output */ | |
75 | + "1" (count) /* %1 Input/Output */ | |
76 | + : "memory" /* clobbered */ | |
77 | + ); | |
78 | + | |
79 | + return s; | |
80 | +} | |
81 | +#endif /* __HAVE_ARCH_MEMSET */ |