do_csum.S 2.82 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157


/* Optimised simple memory checksum
 *
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public Licence
 * as published by the Free Software Foundation; either version
 * 2 of the Licence, or (at your option) any later version.
 */
#include <asm/cache.h>

	.section .text
	.balign	L1_CACHE_BYTES

###############################################################################
#
# unsigned int do_csum(const unsigned char *buff, int len)
#
###############################################################################
	.globl	do_csum
	.type	do_csum,@function
do_csum:
	movm	[d2,d3],(sp)
	mov	d1,d2				# count
	mov	d0,a0				# buff
	mov	a0,a1
	clr	d1				# accumulator

	cmp	+0,d2
	ble	do_csum_done			# check for zero length or negative

	# 4-byte align the buffer pointer
	btst	+3,a0
	beq	do_csum_now_4b_aligned

	btst	+1,a0
	beq	do_csum_addr_not_odd
	movbu	(a0),d0
	inc	a0
	asl	+8,d0
	add	d0,d1
	add	-1,d2

do_csum_addr_not_odd:
	cmp	+2,d2
	bcs	do_csum_fewer_than_4
	btst	+2,a0
	beq	do_csum_now_4b_aligned
	movhu	(a0+),d0
	add	d0,d1
	add	-2,d2
	cmp	+4,d2
	bcs	do_csum_fewer_than_4

do_csum_now_4b_aligned:
	# we want to checksum as much as we can in chunks of 32 bytes
	cmp	+31,d2
	bls	do_csum_remainder		# 4-byte aligned remainder

	add	-32,d2
	mov	+32,d3

do_csum_loop:
	mov	(a0+),d0
	mov	(a0+),e0
	mov	(a0+),e1
	mov	(a0+),e3
	add	d0,d1
	addc	e0,d1
	addc	e1,d1
	addc	e3,d1
	mov	(a0+),d0
	mov	(a0+),e0
	mov	(a0+),e1
	mov	(a0+),e3
	addc	d0,d1
	addc	e0,d1
	addc	e1,d1
	addc	e3,d1
	addc	+0,d1

	sub	d3,d2
	bcc	do_csum_loop

	add	d3,d2
	beq	do_csum_done

do_csum_remainder:
	# cut 16-31 bytes down to 0-15
	cmp	+16,d2
	bcs	do_csum_fewer_than_16
	mov	(a0+),d0
	mov	(a0+),e0
	mov	(a0+),e1
	mov	(a0+),e3
	add	d0,d1
	addc	e0,d1
	addc	e1,d1
	addc	e3,d1
	addc	+0,d1
	add	-16,d2
	beq	do_csum_done

do_csum_fewer_than_16:
	# copy the remaining whole words
	cmp	+4,d2
	bcs	do_csum_fewer_than_4
	cmp	+8,d2
	bcs	do_csum_one_word
	cmp	+12,d2
	bcs	do_csum_two_words
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1
do_csum_two_words:
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1
do_csum_one_word:
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1

do_csum_fewer_than_4:
	and	+3,d2
	beq	do_csum_done
	xor_cmp	d0,d0,+2,d2
	bcs	do_csum_fewer_than_2
	movhu	(a0+),d0
	and	+1,d2
	beq	do_csum_add_last_bit
do_csum_fewer_than_2:
	movbu	(a0),d3
	add	d3,d0
do_csum_add_last_bit:
	add	d0,d1
	addc	+0,d1

do_csum_done:
	# compress the checksum down to 16 bits
	mov	+0xffff0000,d0
	and	d1,d0
	asl	+16,d1
	add	d1,d0
	addc	+0xffff,d0
	lsr	+16,d0

	# flip the halves of the word result if the buffer was oddly aligned
	and	+1,a1
	beq	do_csum_not_oddly_aligned
	swaph	d0,d0				# exchange bits 15:8 with 7:0

do_csum_not_oddly_aligned:
	ret	[d2,d3],8

	.size	do_csum, .-do_csum