Commit 16008d641670571ff4cd750b416c7caf2d89f467
Exists in
master
and in
6 other branches
Merge branch 'for-3.3/drivers' of git://git.kernel.dk/linux-block
* 'for-3.3/drivers' of git://git.kernel.dk/linux-block: mtip32xx: do rebuild monitoring asynchronously xen-blkfront: Use kcalloc instead of kzalloc to allocate array mtip32xx: uninitialized variable in mtip_quiesce_io() mtip32xx: updates based on feedback xen-blkback: convert hole punching to discard request on loop devices xen/blkback: Move processing of BLKIF_OP_DISCARD from dispatch_rw_block_io xen/blk[front|back]: Enhance discard support with secure erasing support. xen/blk[front|back]: Squash blkif_request_rw and blkif_request_discard together mtip32xx: update to new ->make_request() API mtip32xx: add module.h include to avoid conflict with moduleh tree mtip32xx: mark a few more items static mtip32xx: ensure that all local functions are static mtip32xx: cleanup compat ioctl handling mtip32xx: fix warnings/errors on 32-bit compiles block: Add driver for Micron RealSSD pcie flash cards
Showing 11 changed files Inline Diff
- drivers/block/Kconfig
- drivers/block/Makefile
- drivers/block/mtip32xx/Kconfig
- drivers/block/mtip32xx/Makefile
- drivers/block/mtip32xx/mtip32xx.c
- drivers/block/mtip32xx/mtip32xx.h
- drivers/block/xen-blkback/blkback.c
- drivers/block/xen-blkback/common.h
- drivers/block/xen-blkback/xenbus.c
- drivers/block/xen-blkfront.c
- include/xen/interface/io/blkif.h
drivers/block/Kconfig
1 | # | 1 | # |
2 | # Block device driver configuration | 2 | # Block device driver configuration |
3 | # | 3 | # |
4 | 4 | ||
5 | menuconfig BLK_DEV | 5 | menuconfig BLK_DEV |
6 | bool "Block devices" | 6 | bool "Block devices" |
7 | depends on BLOCK | 7 | depends on BLOCK |
8 | default y | 8 | default y |
9 | ---help--- | 9 | ---help--- |
10 | Say Y here to get to see options for various different block device | 10 | Say Y here to get to see options for various different block device |
11 | drivers. This option alone does not add any kernel code. | 11 | drivers. This option alone does not add any kernel code. |
12 | 12 | ||
13 | If you say N, all options in this submenu will be skipped and disabled; | 13 | If you say N, all options in this submenu will be skipped and disabled; |
14 | only do this if you know what you are doing. | 14 | only do this if you know what you are doing. |
15 | 15 | ||
16 | if BLK_DEV | 16 | if BLK_DEV |
17 | 17 | ||
18 | config BLK_DEV_FD | 18 | config BLK_DEV_FD |
19 | tristate "Normal floppy disk support" | 19 | tristate "Normal floppy disk support" |
20 | depends on ARCH_MAY_HAVE_PC_FDC | 20 | depends on ARCH_MAY_HAVE_PC_FDC |
21 | ---help--- | 21 | ---help--- |
22 | If you want to use the floppy disk drive(s) of your PC under Linux, | 22 | If you want to use the floppy disk drive(s) of your PC under Linux, |
23 | say Y. Information about this driver, especially important for IBM | 23 | say Y. Information about this driver, especially important for IBM |
24 | Thinkpad users, is contained in | 24 | Thinkpad users, is contained in |
25 | <file:Documentation/blockdev/floppy.txt>. | 25 | <file:Documentation/blockdev/floppy.txt>. |
26 | That file also contains the location of the Floppy driver FAQ as | 26 | That file also contains the location of the Floppy driver FAQ as |
27 | well as location of the fdutils package used to configure additional | 27 | well as location of the fdutils package used to configure additional |
28 | parameters of the driver at run time. | 28 | parameters of the driver at run time. |
29 | 29 | ||
30 | To compile this driver as a module, choose M here: the | 30 | To compile this driver as a module, choose M here: the |
31 | module will be called floppy. | 31 | module will be called floppy. |
32 | 32 | ||
33 | config AMIGA_FLOPPY | 33 | config AMIGA_FLOPPY |
34 | tristate "Amiga floppy support" | 34 | tristate "Amiga floppy support" |
35 | depends on AMIGA | 35 | depends on AMIGA |
36 | 36 | ||
37 | config ATARI_FLOPPY | 37 | config ATARI_FLOPPY |
38 | tristate "Atari floppy support" | 38 | tristate "Atari floppy support" |
39 | depends on ATARI | 39 | depends on ATARI |
40 | 40 | ||
41 | config MAC_FLOPPY | 41 | config MAC_FLOPPY |
42 | tristate "Support for PowerMac floppy" | 42 | tristate "Support for PowerMac floppy" |
43 | depends on PPC_PMAC && !PPC_PMAC64 | 43 | depends on PPC_PMAC && !PPC_PMAC64 |
44 | help | 44 | help |
45 | If you have a SWIM-3 (Super Woz Integrated Machine 3; from Apple) | 45 | If you have a SWIM-3 (Super Woz Integrated Machine 3; from Apple) |
46 | floppy controller, say Y here. Most commonly found in PowerMacs. | 46 | floppy controller, say Y here. Most commonly found in PowerMacs. |
47 | 47 | ||
48 | config BLK_DEV_SWIM | 48 | config BLK_DEV_SWIM |
49 | tristate "Support for SWIM Macintosh floppy" | 49 | tristate "Support for SWIM Macintosh floppy" |
50 | depends on M68K && MAC | 50 | depends on M68K && MAC |
51 | help | 51 | help |
52 | You should select this option if you want floppy support | 52 | You should select this option if you want floppy support |
53 | and you don't have a II, IIfx, Q900, Q950 or AV series. | 53 | and you don't have a II, IIfx, Q900, Q950 or AV series. |
54 | 54 | ||
55 | config AMIGA_Z2RAM | 55 | config AMIGA_Z2RAM |
56 | tristate "Amiga Zorro II ramdisk support" | 56 | tristate "Amiga Zorro II ramdisk support" |
57 | depends on ZORRO | 57 | depends on ZORRO |
58 | help | 58 | help |
59 | This enables support for using Chip RAM and Zorro II RAM as a | 59 | This enables support for using Chip RAM and Zorro II RAM as a |
60 | ramdisk or as a swap partition. Say Y if you want to include this | 60 | ramdisk or as a swap partition. Say Y if you want to include this |
61 | driver in the kernel. | 61 | driver in the kernel. |
62 | 62 | ||
63 | To compile this driver as a module, choose M here: the | 63 | To compile this driver as a module, choose M here: the |
64 | module will be called z2ram. | 64 | module will be called z2ram. |
65 | 65 | ||
66 | config BLK_DEV_XD | 66 | config BLK_DEV_XD |
67 | tristate "XT hard disk support" | 67 | tristate "XT hard disk support" |
68 | depends on ISA && ISA_DMA_API | 68 | depends on ISA && ISA_DMA_API |
69 | select CHECK_SIGNATURE | 69 | select CHECK_SIGNATURE |
70 | help | 70 | help |
71 | Very old 8 bit hard disk controllers used in the IBM XT computer | 71 | Very old 8 bit hard disk controllers used in the IBM XT computer |
72 | will be supported if you say Y here. | 72 | will be supported if you say Y here. |
73 | 73 | ||
74 | To compile this driver as a module, choose M here: the | 74 | To compile this driver as a module, choose M here: the |
75 | module will be called xd. | 75 | module will be called xd. |
76 | 76 | ||
77 | It's pretty unlikely that you have one of these: say N. | 77 | It's pretty unlikely that you have one of these: say N. |
78 | 78 | ||
79 | config GDROM | 79 | config GDROM |
80 | tristate "SEGA Dreamcast GD-ROM drive" | 80 | tristate "SEGA Dreamcast GD-ROM drive" |
81 | depends on SH_DREAMCAST | 81 | depends on SH_DREAMCAST |
82 | help | 82 | help |
83 | A standard SEGA Dreamcast comes with a modified CD ROM drive called a | 83 | A standard SEGA Dreamcast comes with a modified CD ROM drive called a |
84 | "GD-ROM" by SEGA to signify it is capable of reading special disks | 84 | "GD-ROM" by SEGA to signify it is capable of reading special disks |
85 | with up to 1 GB of data. This drive will also read standard CD ROM | 85 | with up to 1 GB of data. This drive will also read standard CD ROM |
86 | disks. Select this option to access any disks in your GD ROM drive. | 86 | disks. Select this option to access any disks in your GD ROM drive. |
87 | Most users will want to say "Y" here. | 87 | Most users will want to say "Y" here. |
88 | You can also build this as a module which will be called gdrom. | 88 | You can also build this as a module which will be called gdrom. |
89 | 89 | ||
90 | config PARIDE | 90 | config PARIDE |
91 | tristate "Parallel port IDE device support" | 91 | tristate "Parallel port IDE device support" |
92 | depends on PARPORT_PC | 92 | depends on PARPORT_PC |
93 | ---help--- | 93 | ---help--- |
94 | There are many external CD-ROM and disk devices that connect through | 94 | There are many external CD-ROM and disk devices that connect through |
95 | your computer's parallel port. Most of them are actually IDE devices | 95 | your computer's parallel port. Most of them are actually IDE devices |
96 | using a parallel port IDE adapter. This option enables the PARIDE | 96 | using a parallel port IDE adapter. This option enables the PARIDE |
97 | subsystem which contains drivers for many of these external drives. | 97 | subsystem which contains drivers for many of these external drives. |
98 | Read <file:Documentation/blockdev/paride.txt> for more information. | 98 | Read <file:Documentation/blockdev/paride.txt> for more information. |
99 | 99 | ||
100 | If you have said Y to the "Parallel-port support" configuration | 100 | If you have said Y to the "Parallel-port support" configuration |
101 | option, you may share a single port between your printer and other | 101 | option, you may share a single port between your printer and other |
102 | parallel port devices. Answer Y to build PARIDE support into your | 102 | parallel port devices. Answer Y to build PARIDE support into your |
103 | kernel, or M if you would like to build it as a loadable module. If | 103 | kernel, or M if you would like to build it as a loadable module. If |
104 | your parallel port support is in a loadable module, you must build | 104 | your parallel port support is in a loadable module, you must build |
105 | PARIDE as a module. If you built PARIDE support into your kernel, | 105 | PARIDE as a module. If you built PARIDE support into your kernel, |
106 | you may still build the individual protocol modules and high-level | 106 | you may still build the individual protocol modules and high-level |
107 | drivers as loadable modules. If you build this support as a module, | 107 | drivers as loadable modules. If you build this support as a module, |
108 | it will be called paride. | 108 | it will be called paride. |
109 | 109 | ||
110 | To use the PARIDE support, you must say Y or M here and also to at | 110 | To use the PARIDE support, you must say Y or M here and also to at |
111 | least one high-level driver (e.g. "Parallel port IDE disks", | 111 | least one high-level driver (e.g. "Parallel port IDE disks", |
112 | "Parallel port ATAPI CD-ROMs", "Parallel port ATAPI disks" etc.) and | 112 | "Parallel port ATAPI CD-ROMs", "Parallel port ATAPI disks" etc.) and |
113 | to at least one protocol driver (e.g. "ATEN EH-100 protocol", | 113 | to at least one protocol driver (e.g. "ATEN EH-100 protocol", |
114 | "MicroSolutions backpack protocol", "DataStor Commuter protocol" | 114 | "MicroSolutions backpack protocol", "DataStor Commuter protocol" |
115 | etc.). | 115 | etc.). |
116 | 116 | ||
117 | source "drivers/block/paride/Kconfig" | 117 | source "drivers/block/paride/Kconfig" |
118 | 118 | ||
119 | source "drivers/block/mtip32xx/Kconfig" | ||
120 | |||
119 | config BLK_CPQ_DA | 121 | config BLK_CPQ_DA |
120 | tristate "Compaq SMART2 support" | 122 | tristate "Compaq SMART2 support" |
121 | depends on PCI && VIRT_TO_BUS | 123 | depends on PCI && VIRT_TO_BUS |
122 | help | 124 | help |
123 | This is the driver for Compaq Smart Array controllers. Everyone | 125 | This is the driver for Compaq Smart Array controllers. Everyone |
124 | using these boards should say Y here. See the file | 126 | using these boards should say Y here. See the file |
125 | <file:Documentation/blockdev/cpqarray.txt> for the current list of | 127 | <file:Documentation/blockdev/cpqarray.txt> for the current list of |
126 | boards supported by this driver, and for further information on the | 128 | boards supported by this driver, and for further information on the |
127 | use of this driver. | 129 | use of this driver. |
128 | 130 | ||
129 | config BLK_CPQ_CISS_DA | 131 | config BLK_CPQ_CISS_DA |
130 | tristate "Compaq Smart Array 5xxx support" | 132 | tristate "Compaq Smart Array 5xxx support" |
131 | depends on PCI | 133 | depends on PCI |
132 | help | 134 | help |
133 | This is the driver for Compaq Smart Array 5xxx controllers. | 135 | This is the driver for Compaq Smart Array 5xxx controllers. |
134 | Everyone using these boards should say Y here. | 136 | Everyone using these boards should say Y here. |
135 | See <file:Documentation/blockdev/cciss.txt> for the current list of | 137 | See <file:Documentation/blockdev/cciss.txt> for the current list of |
136 | boards supported by this driver, and for further information | 138 | boards supported by this driver, and for further information |
137 | on the use of this driver. | 139 | on the use of this driver. |
138 | 140 | ||
139 | config CISS_SCSI_TAPE | 141 | config CISS_SCSI_TAPE |
140 | bool "SCSI tape drive support for Smart Array 5xxx" | 142 | bool "SCSI tape drive support for Smart Array 5xxx" |
141 | depends on BLK_CPQ_CISS_DA && PROC_FS | 143 | depends on BLK_CPQ_CISS_DA && PROC_FS |
142 | depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA | 144 | depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA |
143 | help | 145 | help |
144 | When enabled (Y), this option allows SCSI tape drives and SCSI medium | 146 | When enabled (Y), this option allows SCSI tape drives and SCSI medium |
145 | changers (tape robots) to be accessed via a Compaq 5xxx array | 147 | changers (tape robots) to be accessed via a Compaq 5xxx array |
146 | controller. (See <file:Documentation/blockdev/cciss.txt> for more details.) | 148 | controller. (See <file:Documentation/blockdev/cciss.txt> for more details.) |
147 | 149 | ||
148 | "SCSI support" and "SCSI tape support" must also be enabled for this | 150 | "SCSI support" and "SCSI tape support" must also be enabled for this |
149 | option to work. | 151 | option to work. |
150 | 152 | ||
151 | When this option is disabled (N), the SCSI portion of the driver | 153 | When this option is disabled (N), the SCSI portion of the driver |
152 | is not compiled. | 154 | is not compiled. |
153 | 155 | ||
154 | config BLK_DEV_DAC960 | 156 | config BLK_DEV_DAC960 |
155 | tristate "Mylex DAC960/DAC1100 PCI RAID Controller support" | 157 | tristate "Mylex DAC960/DAC1100 PCI RAID Controller support" |
156 | depends on PCI | 158 | depends on PCI |
157 | help | 159 | help |
158 | This driver adds support for the Mylex DAC960, AcceleRAID, and | 160 | This driver adds support for the Mylex DAC960, AcceleRAID, and |
159 | eXtremeRAID PCI RAID controllers. See the file | 161 | eXtremeRAID PCI RAID controllers. See the file |
160 | <file:Documentation/blockdev/README.DAC960> for further information | 162 | <file:Documentation/blockdev/README.DAC960> for further information |
161 | about this driver. | 163 | about this driver. |
162 | 164 | ||
163 | To compile this driver as a module, choose M here: the | 165 | To compile this driver as a module, choose M here: the |
164 | module will be called DAC960. | 166 | module will be called DAC960. |
165 | 167 | ||
166 | config BLK_DEV_UMEM | 168 | config BLK_DEV_UMEM |
167 | tristate "Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)" | 169 | tristate "Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)" |
168 | depends on PCI && EXPERIMENTAL | 170 | depends on PCI && EXPERIMENTAL |
169 | ---help--- | 171 | ---help--- |
170 | Saying Y here will include support for the MM5415 family of | 172 | Saying Y here will include support for the MM5415 family of |
171 | battery backed (Non-volatile) RAM cards. | 173 | battery backed (Non-volatile) RAM cards. |
172 | <http://www.umem.com/> | 174 | <http://www.umem.com/> |
173 | 175 | ||
174 | The cards appear as block devices that can be partitioned into | 176 | The cards appear as block devices that can be partitioned into |
175 | as many as 15 partitions. | 177 | as many as 15 partitions. |
176 | 178 | ||
177 | To compile this driver as a module, choose M here: the | 179 | To compile this driver as a module, choose M here: the |
178 | module will be called umem. | 180 | module will be called umem. |
179 | 181 | ||
180 | The umem driver has not yet been allocated a MAJOR number, so | 182 | The umem driver has not yet been allocated a MAJOR number, so |
181 | one is chosen dynamically. | 183 | one is chosen dynamically. |
182 | 184 | ||
183 | config BLK_DEV_UBD | 185 | config BLK_DEV_UBD |
184 | bool "Virtual block device" | 186 | bool "Virtual block device" |
185 | depends on UML | 187 | depends on UML |
186 | ---help--- | 188 | ---help--- |
187 | The User-Mode Linux port includes a driver called UBD which will let | 189 | The User-Mode Linux port includes a driver called UBD which will let |
188 | you access arbitrary files on the host computer as block devices. | 190 | you access arbitrary files on the host computer as block devices. |
189 | Unless you know that you do not need such virtual block devices say | 191 | Unless you know that you do not need such virtual block devices say |
190 | Y here. | 192 | Y here. |
191 | 193 | ||
192 | config BLK_DEV_UBD_SYNC | 194 | config BLK_DEV_UBD_SYNC |
193 | bool "Always do synchronous disk IO for UBD" | 195 | bool "Always do synchronous disk IO for UBD" |
194 | depends on BLK_DEV_UBD | 196 | depends on BLK_DEV_UBD |
195 | ---help--- | 197 | ---help--- |
196 | Writes to the virtual block device are not immediately written to the | 198 | Writes to the virtual block device are not immediately written to the |
197 | host's disk; this may cause problems if, for example, the User-Mode | 199 | host's disk; this may cause problems if, for example, the User-Mode |
198 | Linux 'Virtual Machine' uses a journalling filesystem and the host | 200 | Linux 'Virtual Machine' uses a journalling filesystem and the host |
199 | computer crashes. | 201 | computer crashes. |
200 | 202 | ||
201 | Synchronous operation (i.e. always writing data to the host's disk | 203 | Synchronous operation (i.e. always writing data to the host's disk |
202 | immediately) is configurable on a per-UBD basis by using a special | 204 | immediately) is configurable on a per-UBD basis by using a special |
203 | kernel command line option. Alternatively, you can say Y here to | 205 | kernel command line option. Alternatively, you can say Y here to |
204 | turn on synchronous operation by default for all block devices. | 206 | turn on synchronous operation by default for all block devices. |
205 | 207 | ||
206 | If you're running a journalling file system (like reiserfs, for | 208 | If you're running a journalling file system (like reiserfs, for |
207 | example) in your virtual machine, you will want to say Y here. If | 209 | example) in your virtual machine, you will want to say Y here. If |
208 | you care for the safety of the data in your virtual machine, Y is a | 210 | you care for the safety of the data in your virtual machine, Y is a |
209 | wise choice too. In all other cases (for example, if you're just | 211 | wise choice too. In all other cases (for example, if you're just |
210 | playing around with User-Mode Linux) you can choose N. | 212 | playing around with User-Mode Linux) you can choose N. |
211 | 213 | ||
212 | config BLK_DEV_COW_COMMON | 214 | config BLK_DEV_COW_COMMON |
213 | bool | 215 | bool |
214 | default BLK_DEV_UBD | 216 | default BLK_DEV_UBD |
215 | 217 | ||
216 | config BLK_DEV_LOOP | 218 | config BLK_DEV_LOOP |
217 | tristate "Loopback device support" | 219 | tristate "Loopback device support" |
218 | ---help--- | 220 | ---help--- |
219 | Saying Y here will allow you to use a regular file as a block | 221 | Saying Y here will allow you to use a regular file as a block |
220 | device; you can then create a file system on that block device and | 222 | device; you can then create a file system on that block device and |
221 | mount it just as you would mount other block devices such as hard | 223 | mount it just as you would mount other block devices such as hard |
222 | drive partitions, CD-ROM drives or floppy drives. The loop devices | 224 | drive partitions, CD-ROM drives or floppy drives. The loop devices |
223 | are block special device files with major number 7 and typically | 225 | are block special device files with major number 7 and typically |
224 | called /dev/loop0, /dev/loop1 etc. | 226 | called /dev/loop0, /dev/loop1 etc. |
225 | 227 | ||
226 | This is useful if you want to check an ISO 9660 file system before | 228 | This is useful if you want to check an ISO 9660 file system before |
227 | burning the CD, or if you want to use floppy images without first | 229 | burning the CD, or if you want to use floppy images without first |
228 | writing them to floppy. Furthermore, some Linux distributions avoid | 230 | writing them to floppy. Furthermore, some Linux distributions avoid |
229 | the need for a dedicated Linux partition by keeping their complete | 231 | the need for a dedicated Linux partition by keeping their complete |
230 | root file system inside a DOS FAT file using this loop device | 232 | root file system inside a DOS FAT file using this loop device |
231 | driver. | 233 | driver. |
232 | 234 | ||
233 | To use the loop device, you need the losetup utility, found in the | 235 | To use the loop device, you need the losetup utility, found in the |
234 | util-linux package, see | 236 | util-linux package, see |
235 | <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>. | 237 | <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>. |
236 | 238 | ||
237 | The loop device driver can also be used to "hide" a file system in | 239 | The loop device driver can also be used to "hide" a file system in |
238 | a disk partition, floppy, or regular file, either using encryption | 240 | a disk partition, floppy, or regular file, either using encryption |
239 | (scrambling the data) or steganography (hiding the data in the low | 241 | (scrambling the data) or steganography (hiding the data in the low |
240 | bits of, say, a sound file). This is also safe if the file resides | 242 | bits of, say, a sound file). This is also safe if the file resides |
241 | on a remote file server. | 243 | on a remote file server. |
242 | 244 | ||
243 | There are several ways of encrypting disks. Some of these require | 245 | There are several ways of encrypting disks. Some of these require |
244 | kernel patches. The vanilla kernel offers the cryptoloop option | 246 | kernel patches. The vanilla kernel offers the cryptoloop option |
245 | and a Device Mapper target (which is superior, as it supports all | 247 | and a Device Mapper target (which is superior, as it supports all |
246 | file systems). If you want to use the cryptoloop, say Y to both | 248 | file systems). If you want to use the cryptoloop, say Y to both |
247 | LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12 | 249 | LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12 |
248 | or later) version of util-linux. Additionally, be aware that | 250 | or later) version of util-linux. Additionally, be aware that |
249 | the cryptoloop is not safe for storing journaled filesystems. | 251 | the cryptoloop is not safe for storing journaled filesystems. |
250 | 252 | ||
251 | Note that this loop device has nothing to do with the loopback | 253 | Note that this loop device has nothing to do with the loopback |
252 | device used for network connections from the machine to itself. | 254 | device used for network connections from the machine to itself. |
253 | 255 | ||
254 | To compile this driver as a module, choose M here: the | 256 | To compile this driver as a module, choose M here: the |
255 | module will be called loop. | 257 | module will be called loop. |
256 | 258 | ||
257 | Most users will answer N here. | 259 | Most users will answer N here. |
258 | 260 | ||
259 | config BLK_DEV_LOOP_MIN_COUNT | 261 | config BLK_DEV_LOOP_MIN_COUNT |
260 | int "Number of loop devices to pre-create at init time" | 262 | int "Number of loop devices to pre-create at init time" |
261 | depends on BLK_DEV_LOOP | 263 | depends on BLK_DEV_LOOP |
262 | default 8 | 264 | default 8 |
263 | help | 265 | help |
264 | Static number of loop devices to be unconditionally pre-created | 266 | Static number of loop devices to be unconditionally pre-created |
265 | at init time. | 267 | at init time. |
266 | 268 | ||
267 | This default value can be overwritten on the kernel command | 269 | This default value can be overwritten on the kernel command |
268 | line or with module-parameter loop.max_loop. | 270 | line or with module-parameter loop.max_loop. |
269 | 271 | ||
270 | The historic default is 8. If a late 2011 version of losetup(8) | 272 | The historic default is 8. If a late 2011 version of losetup(8) |
271 | is used, it can be set to 0, since needed loop devices can be | 273 | is used, it can be set to 0, since needed loop devices can be |
272 | dynamically allocated with the /dev/loop-control interface. | 274 | dynamically allocated with the /dev/loop-control interface. |
273 | 275 | ||
274 | config BLK_DEV_CRYPTOLOOP | 276 | config BLK_DEV_CRYPTOLOOP |
275 | tristate "Cryptoloop Support" | 277 | tristate "Cryptoloop Support" |
276 | select CRYPTO | 278 | select CRYPTO |
277 | select CRYPTO_CBC | 279 | select CRYPTO_CBC |
278 | depends on BLK_DEV_LOOP | 280 | depends on BLK_DEV_LOOP |
279 | ---help--- | 281 | ---help--- |
280 | Say Y here if you want to be able to use the ciphers that are | 282 | Say Y here if you want to be able to use the ciphers that are |
281 | provided by the CryptoAPI as loop transformation. This might be | 283 | provided by the CryptoAPI as loop transformation. This might be |
282 | used as hard disk encryption. | 284 | used as hard disk encryption. |
283 | 285 | ||
284 | WARNING: This device is not safe for journaled file systems like | 286 | WARNING: This device is not safe for journaled file systems like |
285 | ext3 or Reiserfs. Please use the Device Mapper crypto module | 287 | ext3 or Reiserfs. Please use the Device Mapper crypto module |
286 | instead, which can be configured to be on-disk compatible with the | 288 | instead, which can be configured to be on-disk compatible with the |
287 | cryptoloop device. | 289 | cryptoloop device. |
288 | 290 | ||
289 | source "drivers/block/drbd/Kconfig" | 291 | source "drivers/block/drbd/Kconfig" |
290 | 292 | ||
291 | config BLK_DEV_NBD | 293 | config BLK_DEV_NBD |
292 | tristate "Network block device support" | 294 | tristate "Network block device support" |
293 | depends on NET | 295 | depends on NET |
294 | ---help--- | 296 | ---help--- |
295 | Saying Y here will allow your computer to be a client for network | 297 | Saying Y here will allow your computer to be a client for network |
296 | block devices, i.e. it will be able to use block devices exported by | 298 | block devices, i.e. it will be able to use block devices exported by |
297 | servers (mount file systems on them etc.). Communication between | 299 | servers (mount file systems on them etc.). Communication between |
298 | client and server works over TCP/IP networking, but to the client | 300 | client and server works over TCP/IP networking, but to the client |
299 | program this is hidden: it looks like a regular local file access to | 301 | program this is hidden: it looks like a regular local file access to |
300 | a block device special file such as /dev/nd0. | 302 | a block device special file such as /dev/nd0. |
301 | 303 | ||
302 | Network block devices also allows you to run a block-device in | 304 | Network block devices also allows you to run a block-device in |
303 | userland (making server and client physically the same computer, | 305 | userland (making server and client physically the same computer, |
304 | communicating using the loopback network device). | 306 | communicating using the loopback network device). |
305 | 307 | ||
306 | Read <file:Documentation/blockdev/nbd.txt> for more information, | 308 | Read <file:Documentation/blockdev/nbd.txt> for more information, |
307 | especially about where to find the server code, which runs in user | 309 | especially about where to find the server code, which runs in user |
308 | space and does not need special kernel support. | 310 | space and does not need special kernel support. |
309 | 311 | ||
310 | Note that this has nothing to do with the network file systems NFS | 312 | Note that this has nothing to do with the network file systems NFS |
311 | or Coda; you can say N here even if you intend to use NFS or Coda. | 313 | or Coda; you can say N here even if you intend to use NFS or Coda. |
312 | 314 | ||
313 | To compile this driver as a module, choose M here: the | 315 | To compile this driver as a module, choose M here: the |
314 | module will be called nbd. | 316 | module will be called nbd. |
315 | 317 | ||
316 | If unsure, say N. | 318 | If unsure, say N. |
317 | 319 | ||
318 | config BLK_DEV_OSD | 320 | config BLK_DEV_OSD |
319 | tristate "OSD object-as-blkdev support" | 321 | tristate "OSD object-as-blkdev support" |
320 | depends on SCSI_OSD_ULD | 322 | depends on SCSI_OSD_ULD |
321 | ---help--- | 323 | ---help--- |
322 | Saying Y or M here will allow the exporting of a single SCSI | 324 | Saying Y or M here will allow the exporting of a single SCSI |
323 | OSD (object-based storage) object as a Linux block device. | 325 | OSD (object-based storage) object as a Linux block device. |
324 | 326 | ||
325 | For example, if you create a 2G object on an OSD device, | 327 | For example, if you create a 2G object on an OSD device, |
326 | you can then use this module to present that 2G object as | 328 | you can then use this module to present that 2G object as |
327 | a Linux block device. | 329 | a Linux block device. |
328 | 330 | ||
329 | To compile this driver as a module, choose M here: the | 331 | To compile this driver as a module, choose M here: the |
330 | module will be called osdblk. | 332 | module will be called osdblk. |
331 | 333 | ||
332 | If unsure, say N. | 334 | If unsure, say N. |
333 | 335 | ||
334 | config BLK_DEV_SX8 | 336 | config BLK_DEV_SX8 |
335 | tristate "Promise SATA SX8 support" | 337 | tristate "Promise SATA SX8 support" |
336 | depends on PCI | 338 | depends on PCI |
337 | ---help--- | 339 | ---help--- |
338 | Saying Y or M here will enable support for the | 340 | Saying Y or M here will enable support for the |
339 | Promise SATA SX8 controllers. | 341 | Promise SATA SX8 controllers. |
340 | 342 | ||
341 | Use devices /dev/sx8/$N and /dev/sx8/$Np$M. | 343 | Use devices /dev/sx8/$N and /dev/sx8/$Np$M. |
342 | 344 | ||
343 | config BLK_DEV_UB | 345 | config BLK_DEV_UB |
344 | tristate "Low Performance USB Block driver" | 346 | tristate "Low Performance USB Block driver" |
345 | depends on USB | 347 | depends on USB |
346 | help | 348 | help |
347 | This driver supports certain USB attached storage devices | 349 | This driver supports certain USB attached storage devices |
348 | such as flash keys. | 350 | such as flash keys. |
349 | 351 | ||
350 | If you enable this driver, it is recommended to avoid conflicts | 352 | If you enable this driver, it is recommended to avoid conflicts |
351 | with usb-storage by enabling USB_LIBUSUAL. | 353 | with usb-storage by enabling USB_LIBUSUAL. |
352 | 354 | ||
353 | If unsure, say N. | 355 | If unsure, say N. |
354 | 356 | ||
355 | config BLK_DEV_RAM | 357 | config BLK_DEV_RAM |
356 | tristate "RAM block device support" | 358 | tristate "RAM block device support" |
357 | ---help--- | 359 | ---help--- |
358 | Saying Y here will allow you to use a portion of your RAM memory as | 360 | Saying Y here will allow you to use a portion of your RAM memory as |
359 | a block device, so that you can make file systems on it, read and | 361 | a block device, so that you can make file systems on it, read and |
360 | write to it and do all the other things that you can do with normal | 362 | write to it and do all the other things that you can do with normal |
361 | block devices (such as hard drives). It is usually used to load and | 363 | block devices (such as hard drives). It is usually used to load and |
362 | store a copy of a minimal root file system off of a floppy into RAM | 364 | store a copy of a minimal root file system off of a floppy into RAM |
363 | during the initial install of Linux. | 365 | during the initial install of Linux. |
364 | 366 | ||
365 | Note that the kernel command line option "ramdisk=XX" is now obsolete. | 367 | Note that the kernel command line option "ramdisk=XX" is now obsolete. |
366 | For details, read <file:Documentation/blockdev/ramdisk.txt>. | 368 | For details, read <file:Documentation/blockdev/ramdisk.txt>. |
367 | 369 | ||
368 | To compile this driver as a module, choose M here: the | 370 | To compile this driver as a module, choose M here: the |
369 | module will be called rd. | 371 | module will be called rd. |
370 | 372 | ||
371 | Most normal users won't need the RAM disk functionality, and can | 373 | Most normal users won't need the RAM disk functionality, and can |
372 | thus say N here. | 374 | thus say N here. |
373 | 375 | ||
374 | config BLK_DEV_RAM_COUNT | 376 | config BLK_DEV_RAM_COUNT |
375 | int "Default number of RAM disks" | 377 | int "Default number of RAM disks" |
376 | default "16" | 378 | default "16" |
377 | depends on BLK_DEV_RAM | 379 | depends on BLK_DEV_RAM |
378 | help | 380 | help |
379 | The default value is 16 RAM disks. Change this if you know what you | 381 | The default value is 16 RAM disks. Change this if you know what you |
380 | are doing. If you boot from a filesystem that needs to be extracted | 382 | are doing. If you boot from a filesystem that needs to be extracted |
381 | in memory, you will need at least one RAM disk (e.g. root on cramfs). | 383 | in memory, you will need at least one RAM disk (e.g. root on cramfs). |
382 | 384 | ||
383 | config BLK_DEV_RAM_SIZE | 385 | config BLK_DEV_RAM_SIZE |
384 | int "Default RAM disk size (kbytes)" | 386 | int "Default RAM disk size (kbytes)" |
385 | depends on BLK_DEV_RAM | 387 | depends on BLK_DEV_RAM |
386 | default "4096" | 388 | default "4096" |
387 | help | 389 | help |
388 | The default value is 4096 kilobytes. Only change this if you know | 390 | The default value is 4096 kilobytes. Only change this if you know |
389 | what you are doing. | 391 | what you are doing. |
390 | 392 | ||
391 | config BLK_DEV_XIP | 393 | config BLK_DEV_XIP |
392 | bool "Support XIP filesystems on RAM block device" | 394 | bool "Support XIP filesystems on RAM block device" |
393 | depends on BLK_DEV_RAM | 395 | depends on BLK_DEV_RAM |
394 | default n | 396 | default n |
395 | help | 397 | help |
396 | Support XIP filesystems (such as ext2 with XIP support on) on | 398 | Support XIP filesystems (such as ext2 with XIP support on) on |
397 | top of block ram device. This will slightly enlarge the kernel, and | 399 | top of block ram device. This will slightly enlarge the kernel, and |
398 | will prevent RAM block device backing store memory from being | 400 | will prevent RAM block device backing store memory from being |
399 | allocated from highmem (only a problem for highmem systems). | 401 | allocated from highmem (only a problem for highmem systems). |
400 | 402 | ||
401 | config CDROM_PKTCDVD | 403 | config CDROM_PKTCDVD |
402 | tristate "Packet writing on CD/DVD media" | 404 | tristate "Packet writing on CD/DVD media" |
403 | depends on !UML | 405 | depends on !UML |
404 | help | 406 | help |
405 | If you have a CDROM/DVD drive that supports packet writing, say | 407 | If you have a CDROM/DVD drive that supports packet writing, say |
406 | Y to include support. It should work with any MMC/Mt Fuji | 408 | Y to include support. It should work with any MMC/Mt Fuji |
407 | compliant ATAPI or SCSI drive, which is just about any newer | 409 | compliant ATAPI or SCSI drive, which is just about any newer |
408 | DVD/CD writer. | 410 | DVD/CD writer. |
409 | 411 | ||
410 | Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs | 412 | Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs |
411 | is possible. | 413 | is possible. |
412 | DVD-RW disks must be in restricted overwrite mode. | 414 | DVD-RW disks must be in restricted overwrite mode. |
413 | 415 | ||
414 | See the file <file:Documentation/cdrom/packet-writing.txt> | 416 | See the file <file:Documentation/cdrom/packet-writing.txt> |
415 | for further information on the use of this driver. | 417 | for further information on the use of this driver. |
416 | 418 | ||
417 | To compile this driver as a module, choose M here: the | 419 | To compile this driver as a module, choose M here: the |
418 | module will be called pktcdvd. | 420 | module will be called pktcdvd. |
419 | 421 | ||
420 | config CDROM_PKTCDVD_BUFFERS | 422 | config CDROM_PKTCDVD_BUFFERS |
421 | int "Free buffers for data gathering" | 423 | int "Free buffers for data gathering" |
422 | depends on CDROM_PKTCDVD | 424 | depends on CDROM_PKTCDVD |
423 | default "8" | 425 | default "8" |
424 | help | 426 | help |
425 | This controls the maximum number of active concurrent packets. More | 427 | This controls the maximum number of active concurrent packets. More |
426 | concurrent packets can increase write performance, but also require | 428 | concurrent packets can increase write performance, but also require |
427 | more memory. Each concurrent packet will require approximately 64Kb | 429 | more memory. Each concurrent packet will require approximately 64Kb |
428 | of non-swappable kernel memory, memory which will be allocated when | 430 | of non-swappable kernel memory, memory which will be allocated when |
429 | a disc is opened for writing. | 431 | a disc is opened for writing. |
430 | 432 | ||
431 | config CDROM_PKTCDVD_WCACHE | 433 | config CDROM_PKTCDVD_WCACHE |
432 | bool "Enable write caching (EXPERIMENTAL)" | 434 | bool "Enable write caching (EXPERIMENTAL)" |
433 | depends on CDROM_PKTCDVD && EXPERIMENTAL | 435 | depends on CDROM_PKTCDVD && EXPERIMENTAL |
434 | help | 436 | help |
435 | If enabled, write caching will be set for the CD-R/W device. For now | 437 | If enabled, write caching will be set for the CD-R/W device. For now |
436 | this option is dangerous unless the CD-RW media is known good, as we | 438 | this option is dangerous unless the CD-RW media is known good, as we |
437 | don't do deferred write error handling yet. | 439 | don't do deferred write error handling yet. |
438 | 440 | ||
439 | config ATA_OVER_ETH | 441 | config ATA_OVER_ETH |
440 | tristate "ATA over Ethernet support" | 442 | tristate "ATA over Ethernet support" |
441 | depends on NET | 443 | depends on NET |
442 | help | 444 | help |
443 | This driver provides Support for ATA over Ethernet block | 445 | This driver provides Support for ATA over Ethernet block |
444 | devices like the Coraid EtherDrive (R) Storage Blade. | 446 | devices like the Coraid EtherDrive (R) Storage Blade. |
445 | 447 | ||
446 | config MG_DISK | 448 | config MG_DISK |
447 | tristate "mGine mflash, gflash support" | 449 | tristate "mGine mflash, gflash support" |
448 | depends on ARM && GPIOLIB | 450 | depends on ARM && GPIOLIB |
449 | help | 451 | help |
450 | mGine mFlash(gFlash) block device driver | 452 | mGine mFlash(gFlash) block device driver |
451 | 453 | ||
452 | config MG_DISK_RES | 454 | config MG_DISK_RES |
453 | int "Size of reserved area before MBR" | 455 | int "Size of reserved area before MBR" |
454 | depends on MG_DISK | 456 | depends on MG_DISK |
455 | default 0 | 457 | default 0 |
456 | help | 458 | help |
457 | Define size of reserved area that usually used for boot. Unit is KB. | 459 | Define size of reserved area that usually used for boot. Unit is KB. |
458 | All of the block device operation will be taken this value as start | 460 | All of the block device operation will be taken this value as start |
459 | offset | 461 | offset |
460 | Examples: | 462 | Examples: |
461 | 1024 => 1 MB | 463 | 1024 => 1 MB |
462 | 464 | ||
463 | config SUNVDC | 465 | config SUNVDC |
464 | tristate "Sun Virtual Disk Client support" | 466 | tristate "Sun Virtual Disk Client support" |
465 | depends on SUN_LDOMS | 467 | depends on SUN_LDOMS |
466 | help | 468 | help |
467 | Support for virtual disk devices as a client under Sun | 469 | Support for virtual disk devices as a client under Sun |
468 | Logical Domains. | 470 | Logical Domains. |
469 | 471 | ||
470 | source "drivers/s390/block/Kconfig" | 472 | source "drivers/s390/block/Kconfig" |
471 | 473 | ||
472 | config XILINX_SYSACE | 474 | config XILINX_SYSACE |
473 | tristate "Xilinx SystemACE support" | 475 | tristate "Xilinx SystemACE support" |
474 | depends on 4xx || MICROBLAZE | 476 | depends on 4xx || MICROBLAZE |
475 | help | 477 | help |
476 | Include support for the Xilinx SystemACE CompactFlash interface | 478 | Include support for the Xilinx SystemACE CompactFlash interface |
477 | 479 | ||
478 | config XEN_BLKDEV_FRONTEND | 480 | config XEN_BLKDEV_FRONTEND |
479 | tristate "Xen virtual block device support" | 481 | tristate "Xen virtual block device support" |
480 | depends on XEN | 482 | depends on XEN |
481 | default y | 483 | default y |
482 | select XEN_XENBUS_FRONTEND | 484 | select XEN_XENBUS_FRONTEND |
483 | help | 485 | help |
484 | This driver implements the front-end of the Xen virtual | 486 | This driver implements the front-end of the Xen virtual |
485 | block device driver. It communicates with a back-end driver | 487 | block device driver. It communicates with a back-end driver |
486 | in another domain which drives the actual block device. | 488 | in another domain which drives the actual block device. |
487 | 489 | ||
488 | config XEN_BLKDEV_BACKEND | 490 | config XEN_BLKDEV_BACKEND |
489 | tristate "Xen block-device backend driver" | 491 | tristate "Xen block-device backend driver" |
490 | depends on XEN_BACKEND | 492 | depends on XEN_BACKEND |
491 | help | 493 | help |
492 | The block-device backend driver allows the kernel to export its | 494 | The block-device backend driver allows the kernel to export its |
493 | block devices to other guests via a high-performance shared-memory | 495 | block devices to other guests via a high-performance shared-memory |
494 | interface. | 496 | interface. |
495 | 497 | ||
496 | The corresponding Linux frontend driver is enabled by the | 498 | The corresponding Linux frontend driver is enabled by the |
497 | CONFIG_XEN_BLKDEV_FRONTEND configuration option. | 499 | CONFIG_XEN_BLKDEV_FRONTEND configuration option. |
498 | 500 | ||
499 | The backend driver attaches itself to a any block device specified | 501 | The backend driver attaches itself to a any block device specified |
500 | in the XenBus configuration. There are no limits to what the block | 502 | in the XenBus configuration. There are no limits to what the block |
501 | device as long as it has a major and minor. | 503 | device as long as it has a major and minor. |
502 | 504 | ||
503 | If you are compiling a kernel to run in a Xen block backend driver | 505 | If you are compiling a kernel to run in a Xen block backend driver |
504 | domain (often this is domain 0) you should say Y here. To | 506 | domain (often this is domain 0) you should say Y here. To |
505 | compile this driver as a module, chose M here: the module | 507 | compile this driver as a module, chose M here: the module |
506 | will be called xen-blkback. | 508 | will be called xen-blkback. |
507 | 509 | ||
508 | 510 | ||
509 | config VIRTIO_BLK | 511 | config VIRTIO_BLK |
510 | tristate "Virtio block driver (EXPERIMENTAL)" | 512 | tristate "Virtio block driver (EXPERIMENTAL)" |
511 | depends on EXPERIMENTAL && VIRTIO | 513 | depends on EXPERIMENTAL && VIRTIO |
512 | ---help--- | 514 | ---help--- |
513 | This is the virtual block driver for virtio. It can be used with | 515 | This is the virtual block driver for virtio. It can be used with |
514 | lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. | 516 | lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. |
515 | 517 | ||
516 | config BLK_DEV_HD | 518 | config BLK_DEV_HD |
517 | bool "Very old hard disk (MFM/RLL/IDE) driver" | 519 | bool "Very old hard disk (MFM/RLL/IDE) driver" |
518 | depends on HAVE_IDE | 520 | depends on HAVE_IDE |
519 | depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN | 521 | depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN |
520 | help | 522 | help |
521 | This is a very old hard disk driver that lacks the enhanced | 523 | This is a very old hard disk driver that lacks the enhanced |
522 | functionality of the newer ones. | 524 | functionality of the newer ones. |
523 | 525 | ||
524 | It is required for systems with ancient MFM/RLL/ESDI drives. | 526 | It is required for systems with ancient MFM/RLL/ESDI drives. |
525 | 527 | ||
526 | If unsure, say N. | 528 | If unsure, say N. |
527 | 529 | ||
528 | config BLK_DEV_RBD | 530 | config BLK_DEV_RBD |
529 | tristate "Rados block device (RBD)" | 531 | tristate "Rados block device (RBD)" |
530 | depends on INET && EXPERIMENTAL && BLOCK | 532 | depends on INET && EXPERIMENTAL && BLOCK |
531 | select CEPH_LIB | 533 | select CEPH_LIB |
532 | select LIBCRC32C | 534 | select LIBCRC32C |
533 | select CRYPTO_AES | 535 | select CRYPTO_AES |
534 | select CRYPTO | 536 | select CRYPTO |
535 | default n | 537 | default n |
536 | help | 538 | help |
537 | Say Y here if you want include the Rados block device, which stripes | 539 | Say Y here if you want include the Rados block device, which stripes |
538 | a block device over objects stored in the Ceph distributed object | 540 | a block device over objects stored in the Ceph distributed object |
539 | store. | 541 | store. |
540 | 542 | ||
541 | More information at http://ceph.newdream.net/. | 543 | More information at http://ceph.newdream.net/. |
542 | 544 | ||
543 | If unsure, say N. | 545 | If unsure, say N. |
544 | 546 | ||
545 | endif # BLK_DEV | 547 | endif # BLK_DEV |
546 | 548 |
drivers/block/Makefile
1 | # | 1 | # |
2 | # Makefile for the kernel block device drivers. | 2 | # Makefile for the kernel block device drivers. |
3 | # | 3 | # |
4 | # 12 June 2000, Christoph Hellwig <hch@infradead.org> | 4 | # 12 June 2000, Christoph Hellwig <hch@infradead.org> |
5 | # Rewritten to use lists instead of if-statements. | 5 | # Rewritten to use lists instead of if-statements. |
6 | # | 6 | # |
7 | 7 | ||
8 | obj-$(CONFIG_MAC_FLOPPY) += swim3.o | 8 | obj-$(CONFIG_MAC_FLOPPY) += swim3.o |
9 | obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o | 9 | obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o |
10 | obj-$(CONFIG_BLK_DEV_FD) += floppy.o | 10 | obj-$(CONFIG_BLK_DEV_FD) += floppy.o |
11 | obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o | 11 | obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o |
12 | obj-$(CONFIG_PS3_DISK) += ps3disk.o | 12 | obj-$(CONFIG_PS3_DISK) += ps3disk.o |
13 | obj-$(CONFIG_PS3_VRAM) += ps3vram.o | 13 | obj-$(CONFIG_PS3_VRAM) += ps3vram.o |
14 | obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o | 14 | obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o |
15 | obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o | 15 | obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o |
16 | obj-$(CONFIG_BLK_DEV_RAM) += brd.o | 16 | obj-$(CONFIG_BLK_DEV_RAM) += brd.o |
17 | obj-$(CONFIG_BLK_DEV_LOOP) += loop.o | 17 | obj-$(CONFIG_BLK_DEV_LOOP) += loop.o |
18 | obj-$(CONFIG_BLK_DEV_XD) += xd.o | 18 | obj-$(CONFIG_BLK_DEV_XD) += xd.o |
19 | obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o | 19 | obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o |
20 | obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o | 20 | obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o |
21 | obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o | 21 | obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o |
22 | obj-$(CONFIG_XILINX_SYSACE) += xsysace.o | 22 | obj-$(CONFIG_XILINX_SYSACE) += xsysace.o |
23 | obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o | 23 | obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o |
24 | obj-$(CONFIG_MG_DISK) += mg_disk.o | 24 | obj-$(CONFIG_MG_DISK) += mg_disk.o |
25 | obj-$(CONFIG_SUNVDC) += sunvdc.o | 25 | obj-$(CONFIG_SUNVDC) += sunvdc.o |
26 | obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o | 26 | obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o |
27 | 27 | ||
28 | obj-$(CONFIG_BLK_DEV_UMEM) += umem.o | 28 | obj-$(CONFIG_BLK_DEV_UMEM) += umem.o |
29 | obj-$(CONFIG_BLK_DEV_NBD) += nbd.o | 29 | obj-$(CONFIG_BLK_DEV_NBD) += nbd.o |
30 | obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o | 30 | obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o |
31 | obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o | 31 | obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o |
32 | 32 | ||
33 | obj-$(CONFIG_VIODASD) += viodasd.o | 33 | obj-$(CONFIG_VIODASD) += viodasd.o |
34 | obj-$(CONFIG_BLK_DEV_SX8) += sx8.o | 34 | obj-$(CONFIG_BLK_DEV_SX8) += sx8.o |
35 | obj-$(CONFIG_BLK_DEV_UB) += ub.o | 35 | obj-$(CONFIG_BLK_DEV_UB) += ub.o |
36 | obj-$(CONFIG_BLK_DEV_HD) += hd.o | 36 | obj-$(CONFIG_BLK_DEV_HD) += hd.o |
37 | 37 | ||
38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o |
39 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ | 39 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ |
40 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ | 40 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ |
41 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o | 41 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o |
42 | obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ | ||
42 | 43 | ||
43 | swim_mod-y := swim.o swim_asm.o | 44 | swim_mod-y := swim.o swim_asm.o |
44 | 45 |
drivers/block/mtip32xx/Kconfig
File was created | 1 | # | |
2 | # mtip32xx device driver configuration | ||
3 | # | ||
4 | |||
5 | config BLK_DEV_PCIESSD_MTIP32XX | ||
6 | tristate "Block Device Driver for Micron PCIe SSDs" | ||
7 | depends on HOTPLUG_PCI_PCIE | ||
8 | help | ||
9 | This enables the block driver for Micron PCIe SSDs. | ||
10 |
drivers/block/mtip32xx/Makefile
File was created | 1 | # | |
2 | # Makefile for Block device driver for Micron PCIe SSD | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx.o | ||
6 |
drivers/block/mtip32xx/mtip32xx.c
File was created | 1 | /* | |
2 | * Driver for the Micron P320 SSD | ||
3 | * Copyright (C) 2011 Micron Technology, Inc. | ||
4 | * | ||
5 | * Portions of this code were derived from works subjected to the | ||
6 | * following copyright: | ||
7 | * Copyright (C) 2009 Integrated Device Technology, Inc. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #include <linux/pci.h> | ||
22 | #include <linux/interrupt.h> | ||
23 | #include <linux/ata.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/hdreg.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | #include <linux/random.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/compat.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/module.h> | ||
32 | #include <linux/genhd.h> | ||
33 | #include <linux/blkdev.h> | ||
34 | #include <linux/bio.h> | ||
35 | #include <linux/dma-mapping.h> | ||
36 | #include <linux/idr.h> | ||
37 | #include <linux/kthread.h> | ||
38 | #include <../drivers/ata/ahci.h> | ||
39 | #include "mtip32xx.h" | ||
40 | |||
41 | #define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) | ||
42 | #define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16)) | ||
43 | #define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS) | ||
44 | #define HW_PORT_PRIV_DMA_SZ \ | ||
45 | (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ) | ||
46 | |||
47 | #define HOST_HSORG 0xFC | ||
48 | #define HSORG_DISABLE_SLOTGRP_INTR (1<<24) | ||
49 | #define HSORG_DISABLE_SLOTGRP_PXIS (1<<16) | ||
50 | #define HSORG_HWREV 0xFF00 | ||
51 | #define HSORG_STYLE 0x8 | ||
52 | #define HSORG_SLOTGROUPS 0x7 | ||
53 | |||
54 | #define PORT_COMMAND_ISSUE 0x38 | ||
55 | #define PORT_SDBV 0x7C | ||
56 | |||
57 | #define PORT_OFFSET 0x100 | ||
58 | #define PORT_MEM_SIZE 0x80 | ||
59 | |||
60 | #define PORT_IRQ_ERR \ | ||
61 | (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \ | ||
62 | PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \ | ||
63 | PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \ | ||
64 | PORT_IRQ_OVERFLOW) | ||
65 | #define PORT_IRQ_LEGACY \ | ||
66 | (PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS) | ||
67 | #define PORT_IRQ_HANDLED \ | ||
68 | (PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \ | ||
69 | PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \ | ||
70 | PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY) | ||
71 | #define DEF_PORT_IRQ \ | ||
72 | (PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS) | ||
73 | |||
74 | /* product numbers */ | ||
75 | #define MTIP_PRODUCT_UNKNOWN 0x00 | ||
76 | #define MTIP_PRODUCT_ASICFPGA 0x11 | ||
77 | |||
78 | /* Device instance number, incremented each time a device is probed. */ | ||
79 | static int instance; | ||
80 | |||
81 | /* | ||
82 | * Global variable used to hold the major block device number | ||
83 | * allocated in mtip_init(). | ||
84 | */ | ||
85 | static int mtip_major; | ||
86 | |||
87 | static DEFINE_SPINLOCK(rssd_index_lock); | ||
88 | static DEFINE_IDA(rssd_index_ida); | ||
89 | |||
90 | static int mtip_block_initialize(struct driver_data *dd); | ||
91 | |||
92 | #ifdef CONFIG_COMPAT | ||
93 | struct mtip_compat_ide_task_request_s { | ||
94 | __u8 io_ports[8]; | ||
95 | __u8 hob_ports[8]; | ||
96 | ide_reg_valid_t out_flags; | ||
97 | ide_reg_valid_t in_flags; | ||
98 | int data_phase; | ||
99 | int req_cmd; | ||
100 | compat_ulong_t out_size; | ||
101 | compat_ulong_t in_size; | ||
102 | }; | ||
103 | #endif | ||
104 | |||
105 | /* | ||
106 | * This function check_for_surprise_removal is called | ||
107 | * while card is removed from the system and it will | ||
108 | * read the vendor id from the configration space | ||
109 | * | ||
110 | * @pdev Pointer to the pci_dev structure. | ||
111 | * | ||
112 | * return value | ||
113 | * true if device removed, else false | ||
114 | */ | ||
115 | static bool mtip_check_surprise_removal(struct pci_dev *pdev) | ||
116 | { | ||
117 | u16 vendor_id = 0; | ||
118 | |||
119 | /* Read the vendorID from the configuration space */ | ||
120 | pci_read_config_word(pdev, 0x00, &vendor_id); | ||
121 | if (vendor_id == 0xFFFF) | ||
122 | return true; /* device removed */ | ||
123 | |||
124 | return false; /* device present */ | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * This function is called for clean the pending command in the | ||
129 | * command slot during the surprise removal of device and return | ||
130 | * error to the upper layer. | ||
131 | * | ||
132 | * @dd Pointer to the DRIVER_DATA structure. | ||
133 | * | ||
134 | * return value | ||
135 | * None | ||
136 | */ | ||
137 | static void mtip_command_cleanup(struct driver_data *dd) | ||
138 | { | ||
139 | int group = 0, commandslot = 0, commandindex = 0; | ||
140 | struct mtip_cmd *command; | ||
141 | struct mtip_port *port = dd->port; | ||
142 | |||
143 | for (group = 0; group < 4; group++) { | ||
144 | for (commandslot = 0; commandslot < 32; commandslot++) { | ||
145 | if (!(port->allocated[group] & (1 << commandslot))) | ||
146 | continue; | ||
147 | |||
148 | commandindex = group << 5 | commandslot; | ||
149 | command = &port->commands[commandindex]; | ||
150 | |||
151 | if (atomic_read(&command->active) | ||
152 | && (command->async_callback)) { | ||
153 | command->async_callback(command->async_data, | ||
154 | -ENODEV); | ||
155 | command->async_callback = NULL; | ||
156 | command->async_data = NULL; | ||
157 | } | ||
158 | |||
159 | dma_unmap_sg(&port->dd->pdev->dev, | ||
160 | command->sg, | ||
161 | command->scatter_ents, | ||
162 | command->direction); | ||
163 | } | ||
164 | } | ||
165 | |||
166 | up(&port->cmd_slot); | ||
167 | |||
168 | atomic_set(&dd->drv_cleanup_done, true); | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Obtain an empty command slot. | ||
173 | * | ||
174 | * This function needs to be reentrant since it could be called | ||
175 | * at the same time on multiple CPUs. The allocation of the | ||
176 | * command slot must be atomic. | ||
177 | * | ||
178 | * @port Pointer to the port data structure. | ||
179 | * | ||
180 | * return value | ||
181 | * >= 0 Index of command slot obtained. | ||
182 | * -1 No command slots available. | ||
183 | */ | ||
184 | static int get_slot(struct mtip_port *port) | ||
185 | { | ||
186 | int slot, i; | ||
187 | unsigned int num_command_slots = port->dd->slot_groups * 32; | ||
188 | |||
189 | /* | ||
190 | * Try 10 times, because there is a small race here. | ||
191 | * that's ok, because it's still cheaper than a lock. | ||
192 | * | ||
193 | * Race: Since this section is not protected by lock, same bit | ||
194 | * could be chosen by different process contexts running in | ||
195 | * different processor. So instead of costly lock, we are going | ||
196 | * with loop. | ||
197 | */ | ||
198 | for (i = 0; i < 10; i++) { | ||
199 | slot = find_next_zero_bit(port->allocated, | ||
200 | num_command_slots, 1); | ||
201 | if ((slot < num_command_slots) && | ||
202 | (!test_and_set_bit(slot, port->allocated))) | ||
203 | return slot; | ||
204 | } | ||
205 | dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n"); | ||
206 | |||
207 | if (mtip_check_surprise_removal(port->dd->pdev)) { | ||
208 | /* Device not present, clean outstanding commands */ | ||
209 | mtip_command_cleanup(port->dd); | ||
210 | } | ||
211 | return -1; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * Release a command slot. | ||
216 | * | ||
217 | * @port Pointer to the port data structure. | ||
218 | * @tag Tag of command to release | ||
219 | * | ||
220 | * return value | ||
221 | * None | ||
222 | */ | ||
223 | static inline void release_slot(struct mtip_port *port, int tag) | ||
224 | { | ||
225 | smp_mb__before_clear_bit(); | ||
226 | clear_bit(tag, port->allocated); | ||
227 | smp_mb__after_clear_bit(); | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Reset the HBA (without sleeping) | ||
232 | * | ||
233 | * Just like hba_reset, except does not call sleep, so can be | ||
234 | * run from interrupt/tasklet context. | ||
235 | * | ||
236 | * @dd Pointer to the driver data structure. | ||
237 | * | ||
238 | * return value | ||
239 | * 0 The reset was successful. | ||
240 | * -1 The HBA Reset bit did not clear. | ||
241 | */ | ||
242 | static int hba_reset_nosleep(struct driver_data *dd) | ||
243 | { | ||
244 | unsigned long timeout; | ||
245 | |||
246 | /* Chip quirk: quiesce any chip function */ | ||
247 | mdelay(10); | ||
248 | |||
249 | /* Set the reset bit */ | ||
250 | writel(HOST_RESET, dd->mmio + HOST_CTL); | ||
251 | |||
252 | /* Flush */ | ||
253 | readl(dd->mmio + HOST_CTL); | ||
254 | |||
255 | /* | ||
256 | * Wait 10ms then spin for up to 1 second | ||
257 | * waiting for reset acknowledgement | ||
258 | */ | ||
259 | timeout = jiffies + msecs_to_jiffies(1000); | ||
260 | mdelay(10); | ||
261 | while ((readl(dd->mmio + HOST_CTL) & HOST_RESET) | ||
262 | && time_before(jiffies, timeout)) | ||
263 | mdelay(1); | ||
264 | |||
265 | if (readl(dd->mmio + HOST_CTL) & HOST_RESET) | ||
266 | return -1; | ||
267 | |||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Issue a command to the hardware. | ||
273 | * | ||
274 | * Set the appropriate bit in the s_active and Command Issue hardware | ||
275 | * registers, causing hardware command processing to begin. | ||
276 | * | ||
277 | * @port Pointer to the port structure. | ||
278 | * @tag The tag of the command to be issued. | ||
279 | * | ||
280 | * return value | ||
281 | * None | ||
282 | */ | ||
283 | static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) | ||
284 | { | ||
285 | unsigned long flags = 0; | ||
286 | |||
287 | atomic_set(&port->commands[tag].active, 1); | ||
288 | |||
289 | spin_lock_irqsave(&port->cmd_issue_lock, flags); | ||
290 | |||
291 | writel((1 << MTIP_TAG_BIT(tag)), | ||
292 | port->s_active[MTIP_TAG_INDEX(tag)]); | ||
293 | writel((1 << MTIP_TAG_BIT(tag)), | ||
294 | port->cmd_issue[MTIP_TAG_INDEX(tag)]); | ||
295 | |||
296 | spin_unlock_irqrestore(&port->cmd_issue_lock, flags); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * Enable/disable the reception of FIS | ||
301 | * | ||
302 | * @port Pointer to the port data structure | ||
303 | * @enable 1 to enable, 0 to disable | ||
304 | * | ||
305 | * return value | ||
306 | * Previous state: 1 enabled, 0 disabled | ||
307 | */ | ||
308 | static int mtip_enable_fis(struct mtip_port *port, int enable) | ||
309 | { | ||
310 | u32 tmp; | ||
311 | |||
312 | /* enable FIS reception */ | ||
313 | tmp = readl(port->mmio + PORT_CMD); | ||
314 | if (enable) | ||
315 | writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD); | ||
316 | else | ||
317 | writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD); | ||
318 | |||
319 | /* Flush */ | ||
320 | readl(port->mmio + PORT_CMD); | ||
321 | |||
322 | return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX)); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Enable/disable the DMA engine | ||
327 | * | ||
328 | * @port Pointer to the port data structure | ||
329 | * @enable 1 to enable, 0 to disable | ||
330 | * | ||
331 | * return value | ||
332 | * Previous state: 1 enabled, 0 disabled. | ||
333 | */ | ||
334 | static int mtip_enable_engine(struct mtip_port *port, int enable) | ||
335 | { | ||
336 | u32 tmp; | ||
337 | |||
338 | /* enable FIS reception */ | ||
339 | tmp = readl(port->mmio + PORT_CMD); | ||
340 | if (enable) | ||
341 | writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD); | ||
342 | else | ||
343 | writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD); | ||
344 | |||
345 | readl(port->mmio + PORT_CMD); | ||
346 | return (((tmp & PORT_CMD_START) == PORT_CMD_START)); | ||
347 | } | ||
348 | |||
349 | /* | ||
350 | * Enables the port DMA engine and FIS reception. | ||
351 | * | ||
352 | * return value | ||
353 | * None | ||
354 | */ | ||
355 | static inline void mtip_start_port(struct mtip_port *port) | ||
356 | { | ||
357 | /* Enable FIS reception */ | ||
358 | mtip_enable_fis(port, 1); | ||
359 | |||
360 | /* Enable the DMA engine */ | ||
361 | mtip_enable_engine(port, 1); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Deinitialize a port by disabling port interrupts, the DMA engine, | ||
366 | * and FIS reception. | ||
367 | * | ||
368 | * @port Pointer to the port structure | ||
369 | * | ||
370 | * return value | ||
371 | * None | ||
372 | */ | ||
373 | static inline void mtip_deinit_port(struct mtip_port *port) | ||
374 | { | ||
375 | /* Disable interrupts on this port */ | ||
376 | writel(0, port->mmio + PORT_IRQ_MASK); | ||
377 | |||
378 | /* Disable the DMA engine */ | ||
379 | mtip_enable_engine(port, 0); | ||
380 | |||
381 | /* Disable FIS reception */ | ||
382 | mtip_enable_fis(port, 0); | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * Initialize a port. | ||
387 | * | ||
388 | * This function deinitializes the port by calling mtip_deinit_port() and | ||
389 | * then initializes it by setting the command header and RX FIS addresses, | ||
390 | * clearing the SError register and any pending port interrupts before | ||
391 | * re-enabling the default set of port interrupts. | ||
392 | * | ||
393 | * @port Pointer to the port structure. | ||
394 | * | ||
395 | * return value | ||
396 | * None | ||
397 | */ | ||
398 | static void mtip_init_port(struct mtip_port *port) | ||
399 | { | ||
400 | int i; | ||
401 | mtip_deinit_port(port); | ||
402 | |||
403 | /* Program the command list base and FIS base addresses */ | ||
404 | if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) { | ||
405 | writel((port->command_list_dma >> 16) >> 16, | ||
406 | port->mmio + PORT_LST_ADDR_HI); | ||
407 | writel((port->rxfis_dma >> 16) >> 16, | ||
408 | port->mmio + PORT_FIS_ADDR_HI); | ||
409 | } | ||
410 | |||
411 | writel(port->command_list_dma & 0xFFFFFFFF, | ||
412 | port->mmio + PORT_LST_ADDR); | ||
413 | writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR); | ||
414 | |||
415 | /* Clear SError */ | ||
416 | writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR); | ||
417 | |||
418 | /* reset the completed registers.*/ | ||
419 | for (i = 0; i < port->dd->slot_groups; i++) | ||
420 | writel(0xFFFFFFFF, port->completed[i]); | ||
421 | |||
422 | /* Clear any pending interrupts for this port */ | ||
423 | writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT); | ||
424 | |||
425 | /* Enable port interrupts */ | ||
426 | writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK); | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * Restart a port | ||
431 | * | ||
432 | * @port Pointer to the port data structure. | ||
433 | * | ||
434 | * return value | ||
435 | * None | ||
436 | */ | ||
437 | static void mtip_restart_port(struct mtip_port *port) | ||
438 | { | ||
439 | unsigned long timeout; | ||
440 | |||
441 | /* Disable the DMA engine */ | ||
442 | mtip_enable_engine(port, 0); | ||
443 | |||
444 | /* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */ | ||
445 | timeout = jiffies + msecs_to_jiffies(500); | ||
446 | while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) | ||
447 | && time_before(jiffies, timeout)) | ||
448 | ; | ||
449 | |||
450 | /* | ||
451 | * Chip quirk: escalate to hba reset if | ||
452 | * PxCMD.CR not clear after 500 ms | ||
453 | */ | ||
454 | if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) { | ||
455 | dev_warn(&port->dd->pdev->dev, | ||
456 | "PxCMD.CR not clear, escalating reset\n"); | ||
457 | |||
458 | if (hba_reset_nosleep(port->dd)) | ||
459 | dev_err(&port->dd->pdev->dev, | ||
460 | "HBA reset escalation failed.\n"); | ||
461 | |||
462 | /* 30 ms delay before com reset to quiesce chip */ | ||
463 | mdelay(30); | ||
464 | } | ||
465 | |||
466 | dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n"); | ||
467 | |||
468 | /* Set PxSCTL.DET */ | ||
469 | writel(readl(port->mmio + PORT_SCR_CTL) | | ||
470 | 1, port->mmio + PORT_SCR_CTL); | ||
471 | readl(port->mmio + PORT_SCR_CTL); | ||
472 | |||
473 | /* Wait 1 ms to quiesce chip function */ | ||
474 | timeout = jiffies + msecs_to_jiffies(1); | ||
475 | while (time_before(jiffies, timeout)) | ||
476 | ; | ||
477 | |||
478 | /* Clear PxSCTL.DET */ | ||
479 | writel(readl(port->mmio + PORT_SCR_CTL) & ~1, | ||
480 | port->mmio + PORT_SCR_CTL); | ||
481 | readl(port->mmio + PORT_SCR_CTL); | ||
482 | |||
483 | /* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */ | ||
484 | timeout = jiffies + msecs_to_jiffies(500); | ||
485 | while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0) | ||
486 | && time_before(jiffies, timeout)) | ||
487 | ; | ||
488 | |||
489 | if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0) | ||
490 | dev_warn(&port->dd->pdev->dev, | ||
491 | "COM reset failed\n"); | ||
492 | |||
493 | /* Clear SError, the PxSERR.DIAG.x should be set so clear it */ | ||
494 | writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR); | ||
495 | |||
496 | /* Enable the DMA engine */ | ||
497 | mtip_enable_engine(port, 1); | ||
498 | } | ||
499 | |||
500 | /* | ||
501 | * Called periodically to see if any read/write commands are | ||
502 | * taking too long to complete. | ||
503 | * | ||
504 | * @data Pointer to the PORT data structure. | ||
505 | * | ||
506 | * return value | ||
507 | * None | ||
508 | */ | ||
509 | static void mtip_timeout_function(unsigned long int data) | ||
510 | { | ||
511 | struct mtip_port *port = (struct mtip_port *) data; | ||
512 | struct host_to_dev_fis *fis; | ||
513 | struct mtip_cmd *command; | ||
514 | int tag, cmdto_cnt = 0; | ||
515 | unsigned int bit, group; | ||
516 | unsigned int num_command_slots = port->dd->slot_groups * 32; | ||
517 | |||
518 | if (unlikely(!port)) | ||
519 | return; | ||
520 | |||
521 | if (atomic_read(&port->dd->resumeflag) == true) { | ||
522 | mod_timer(&port->cmd_timer, | ||
523 | jiffies + msecs_to_jiffies(30000)); | ||
524 | return; | ||
525 | } | ||
526 | |||
527 | for (tag = 0; tag < num_command_slots; tag++) { | ||
528 | /* | ||
529 | * Skip internal command slot as it has | ||
530 | * its own timeout mechanism | ||
531 | */ | ||
532 | if (tag == MTIP_TAG_INTERNAL) | ||
533 | continue; | ||
534 | |||
535 | if (atomic_read(&port->commands[tag].active) && | ||
536 | (time_after(jiffies, port->commands[tag].comp_time))) { | ||
537 | group = tag >> 5; | ||
538 | bit = tag & 0x1F; | ||
539 | |||
540 | command = &port->commands[tag]; | ||
541 | fis = (struct host_to_dev_fis *) command->command; | ||
542 | |||
543 | dev_warn(&port->dd->pdev->dev, | ||
544 | "Timeout for command tag %d\n", tag); | ||
545 | |||
546 | cmdto_cnt++; | ||
547 | if (cmdto_cnt == 1) | ||
548 | set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); | ||
549 | |||
550 | /* | ||
551 | * Clear the completed bit. This should prevent | ||
552 | * any interrupt handlers from trying to retire | ||
553 | * the command. | ||
554 | */ | ||
555 | writel(1 << bit, port->completed[group]); | ||
556 | |||
557 | /* Call the async completion callback. */ | ||
558 | if (likely(command->async_callback)) | ||
559 | command->async_callback(command->async_data, | ||
560 | -EIO); | ||
561 | command->async_callback = NULL; | ||
562 | command->comp_func = NULL; | ||
563 | |||
564 | /* Unmap the DMA scatter list entries */ | ||
565 | dma_unmap_sg(&port->dd->pdev->dev, | ||
566 | command->sg, | ||
567 | command->scatter_ents, | ||
568 | command->direction); | ||
569 | |||
570 | /* | ||
571 | * Clear the allocated bit and active tag for the | ||
572 | * command. | ||
573 | */ | ||
574 | atomic_set(&port->commands[tag].active, 0); | ||
575 | release_slot(port, tag); | ||
576 | |||
577 | up(&port->cmd_slot); | ||
578 | } | ||
579 | } | ||
580 | |||
581 | if (cmdto_cnt) { | ||
582 | dev_warn(&port->dd->pdev->dev, | ||
583 | "%d commands timed out: restarting port", | ||
584 | cmdto_cnt); | ||
585 | mtip_restart_port(port); | ||
586 | clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); | ||
587 | wake_up_interruptible(&port->svc_wait); | ||
588 | } | ||
589 | |||
590 | /* Restart the timer */ | ||
591 | mod_timer(&port->cmd_timer, | ||
592 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * IO completion function. | ||
597 | * | ||
598 | * This completion function is called by the driver ISR when a | ||
599 | * command that was issued by the kernel completes. It first calls the | ||
600 | * asynchronous completion function which normally calls back into the block | ||
601 | * layer passing the asynchronous callback data, then unmaps the | ||
602 | * scatter list associated with the completed command, and finally | ||
603 | * clears the allocated bit associated with the completed command. | ||
604 | * | ||
605 | * @port Pointer to the port data structure. | ||
606 | * @tag Tag of the command. | ||
607 | * @data Pointer to driver_data. | ||
608 | * @status Completion status. | ||
609 | * | ||
610 | * return value | ||
611 | * None | ||
612 | */ | ||
613 | static void mtip_async_complete(struct mtip_port *port, | ||
614 | int tag, | ||
615 | void *data, | ||
616 | int status) | ||
617 | { | ||
618 | struct mtip_cmd *command; | ||
619 | struct driver_data *dd = data; | ||
620 | int cb_status = status ? -EIO : 0; | ||
621 | |||
622 | if (unlikely(!dd) || unlikely(!port)) | ||
623 | return; | ||
624 | |||
625 | command = &port->commands[tag]; | ||
626 | |||
627 | if (unlikely(status == PORT_IRQ_TF_ERR)) { | ||
628 | dev_warn(&port->dd->pdev->dev, | ||
629 | "Command tag %d failed due to TFE\n", tag); | ||
630 | } | ||
631 | |||
632 | /* Upper layer callback */ | ||
633 | if (likely(command->async_callback)) | ||
634 | command->async_callback(command->async_data, cb_status); | ||
635 | |||
636 | command->async_callback = NULL; | ||
637 | command->comp_func = NULL; | ||
638 | |||
639 | /* Unmap the DMA scatter list entries */ | ||
640 | dma_unmap_sg(&dd->pdev->dev, | ||
641 | command->sg, | ||
642 | command->scatter_ents, | ||
643 | command->direction); | ||
644 | |||
645 | /* Clear the allocated and active bits for the command */ | ||
646 | atomic_set(&port->commands[tag].active, 0); | ||
647 | release_slot(port, tag); | ||
648 | |||
649 | up(&port->cmd_slot); | ||
650 | } | ||
651 | |||
652 | /* | ||
653 | * Internal command completion callback function. | ||
654 | * | ||
655 | * This function is normally called by the driver ISR when an internal | ||
656 | * command completed. This function signals the command completion by | ||
657 | * calling complete(). | ||
658 | * | ||
659 | * @port Pointer to the port data structure. | ||
660 | * @tag Tag of the command that has completed. | ||
661 | * @data Pointer to a completion structure. | ||
662 | * @status Completion status. | ||
663 | * | ||
664 | * return value | ||
665 | * None | ||
666 | */ | ||
667 | static void mtip_completion(struct mtip_port *port, | ||
668 | int tag, | ||
669 | void *data, | ||
670 | int status) | ||
671 | { | ||
672 | struct mtip_cmd *command = &port->commands[tag]; | ||
673 | struct completion *waiting = data; | ||
674 | if (unlikely(status == PORT_IRQ_TF_ERR)) | ||
675 | dev_warn(&port->dd->pdev->dev, | ||
676 | "Internal command %d completed with TFE\n", tag); | ||
677 | |||
678 | command->async_callback = NULL; | ||
679 | command->comp_func = NULL; | ||
680 | |||
681 | complete(waiting); | ||
682 | } | ||
683 | |||
684 | /* | ||
685 | * Helper function for tag logging | ||
686 | */ | ||
687 | static void print_tags(struct driver_data *dd, | ||
688 | char *msg, | ||
689 | unsigned long *tagbits) | ||
690 | { | ||
691 | unsigned int tag, count = 0; | ||
692 | |||
693 | for (tag = 0; tag < (dd->slot_groups) * 32; tag++) { | ||
694 | if (test_bit(tag, tagbits)) | ||
695 | count++; | ||
696 | } | ||
697 | if (count) | ||
698 | dev_info(&dd->pdev->dev, "%s [%i tags]\n", msg, count); | ||
699 | } | ||
700 | |||
701 | /* | ||
702 | * Handle an error. | ||
703 | * | ||
704 | * @dd Pointer to the DRIVER_DATA structure. | ||
705 | * | ||
706 | * return value | ||
707 | * None | ||
708 | */ | ||
709 | static void mtip_handle_tfe(struct driver_data *dd) | ||
710 | { | ||
711 | int group, tag, bit, reissue; | ||
712 | struct mtip_port *port; | ||
713 | struct mtip_cmd *command; | ||
714 | u32 completed; | ||
715 | struct host_to_dev_fis *fis; | ||
716 | unsigned long tagaccum[SLOTBITS_IN_LONGS]; | ||
717 | |||
718 | dev_warn(&dd->pdev->dev, "Taskfile error\n"); | ||
719 | |||
720 | port = dd->port; | ||
721 | |||
722 | /* Stop the timer to prevent command timeouts. */ | ||
723 | del_timer(&port->cmd_timer); | ||
724 | |||
725 | /* Set eh_active */ | ||
726 | set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); | ||
727 | |||
728 | /* Loop through all the groups */ | ||
729 | for (group = 0; group < dd->slot_groups; group++) { | ||
730 | completed = readl(port->completed[group]); | ||
731 | |||
732 | /* clear completed status register in the hardware.*/ | ||
733 | writel(completed, port->completed[group]); | ||
734 | |||
735 | /* clear the tag accumulator */ | ||
736 | memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); | ||
737 | |||
738 | /* Process successfully completed commands */ | ||
739 | for (bit = 0; bit < 32 && completed; bit++) { | ||
740 | if (!(completed & (1<<bit))) | ||
741 | continue; | ||
742 | tag = (group << 5) + bit; | ||
743 | |||
744 | /* Skip the internal command slot */ | ||
745 | if (tag == MTIP_TAG_INTERNAL) | ||
746 | continue; | ||
747 | |||
748 | command = &port->commands[tag]; | ||
749 | if (likely(command->comp_func)) { | ||
750 | set_bit(tag, tagaccum); | ||
751 | atomic_set(&port->commands[tag].active, 0); | ||
752 | command->comp_func(port, | ||
753 | tag, | ||
754 | command->comp_data, | ||
755 | 0); | ||
756 | } else { | ||
757 | dev_err(&port->dd->pdev->dev, | ||
758 | "Missing completion func for tag %d", | ||
759 | tag); | ||
760 | if (mtip_check_surprise_removal(dd->pdev)) { | ||
761 | mtip_command_cleanup(dd); | ||
762 | /* don't proceed further */ | ||
763 | return; | ||
764 | } | ||
765 | } | ||
766 | } | ||
767 | } | ||
768 | print_tags(dd, "TFE tags completed:", tagaccum); | ||
769 | |||
770 | /* Restart the port */ | ||
771 | mdelay(20); | ||
772 | mtip_restart_port(port); | ||
773 | |||
774 | /* clear the tag accumulator */ | ||
775 | memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); | ||
776 | |||
777 | /* Loop through all the groups */ | ||
778 | for (group = 0; group < dd->slot_groups; group++) { | ||
779 | for (bit = 0; bit < 32; bit++) { | ||
780 | reissue = 1; | ||
781 | tag = (group << 5) + bit; | ||
782 | |||
783 | /* If the active bit is set re-issue the command */ | ||
784 | if (atomic_read(&port->commands[tag].active) == 0) | ||
785 | continue; | ||
786 | |||
787 | fis = (struct host_to_dev_fis *) | ||
788 | port->commands[tag].command; | ||
789 | |||
790 | /* Should re-issue? */ | ||
791 | if (tag == MTIP_TAG_INTERNAL || | ||
792 | fis->command == ATA_CMD_SET_FEATURES) | ||
793 | reissue = 0; | ||
794 | |||
795 | /* | ||
796 | * First check if this command has | ||
797 | * exceeded its retries. | ||
798 | */ | ||
799 | if (reissue && | ||
800 | (port->commands[tag].retries-- > 0)) { | ||
801 | |||
802 | set_bit(tag, tagaccum); | ||
803 | |||
804 | /* Update the timeout value. */ | ||
805 | port->commands[tag].comp_time = | ||
806 | jiffies + msecs_to_jiffies( | ||
807 | MTIP_NCQ_COMMAND_TIMEOUT_MS); | ||
808 | /* Re-issue the command. */ | ||
809 | mtip_issue_ncq_command(port, tag); | ||
810 | |||
811 | continue; | ||
812 | } | ||
813 | |||
814 | /* Retire a command that will not be reissued */ | ||
815 | dev_warn(&port->dd->pdev->dev, | ||
816 | "retiring tag %d\n", tag); | ||
817 | atomic_set(&port->commands[tag].active, 0); | ||
818 | |||
819 | if (port->commands[tag].comp_func) | ||
820 | port->commands[tag].comp_func( | ||
821 | port, | ||
822 | tag, | ||
823 | port->commands[tag].comp_data, | ||
824 | PORT_IRQ_TF_ERR); | ||
825 | else | ||
826 | dev_warn(&port->dd->pdev->dev, | ||
827 | "Bad completion for tag %d\n", | ||
828 | tag); | ||
829 | } | ||
830 | } | ||
831 | print_tags(dd, "TFE tags reissued:", tagaccum); | ||
832 | |||
833 | /* clear eh_active */ | ||
834 | clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); | ||
835 | wake_up_interruptible(&port->svc_wait); | ||
836 | |||
837 | mod_timer(&port->cmd_timer, | ||
838 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
839 | } | ||
840 | |||
841 | /* | ||
842 | * Handle a set device bits interrupt | ||
843 | */ | ||
844 | static inline void mtip_process_sdbf(struct driver_data *dd) | ||
845 | { | ||
846 | struct mtip_port *port = dd->port; | ||
847 | int group, tag, bit; | ||
848 | u32 completed; | ||
849 | struct mtip_cmd *command; | ||
850 | |||
851 | /* walk all bits in all slot groups */ | ||
852 | for (group = 0; group < dd->slot_groups; group++) { | ||
853 | completed = readl(port->completed[group]); | ||
854 | |||
855 | /* clear completed status register in the hardware.*/ | ||
856 | writel(completed, port->completed[group]); | ||
857 | |||
858 | /* Process completed commands. */ | ||
859 | for (bit = 0; | ||
860 | (bit < 32) && completed; | ||
861 | bit++, completed >>= 1) { | ||
862 | if (completed & 0x01) { | ||
863 | tag = (group << 5) | bit; | ||
864 | |||
865 | /* skip internal command slot. */ | ||
866 | if (unlikely(tag == MTIP_TAG_INTERNAL)) | ||
867 | continue; | ||
868 | |||
869 | command = &port->commands[tag]; | ||
870 | /* make internal callback */ | ||
871 | if (likely(command->comp_func)) { | ||
872 | command->comp_func( | ||
873 | port, | ||
874 | tag, | ||
875 | command->comp_data, | ||
876 | 0); | ||
877 | } else { | ||
878 | dev_warn(&dd->pdev->dev, | ||
879 | "Null completion " | ||
880 | "for tag %d", | ||
881 | tag); | ||
882 | |||
883 | if (mtip_check_surprise_removal( | ||
884 | dd->pdev)) { | ||
885 | mtip_command_cleanup(dd); | ||
886 | return; | ||
887 | } | ||
888 | } | ||
889 | } | ||
890 | } | ||
891 | } | ||
892 | } | ||
893 | |||
894 | /* | ||
895 | * Process legacy pio and d2h interrupts | ||
896 | */ | ||
897 | static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) | ||
898 | { | ||
899 | struct mtip_port *port = dd->port; | ||
900 | struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL]; | ||
901 | |||
902 | if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) && | ||
903 | (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) | ||
904 | & (1 << MTIP_TAG_INTERNAL))) { | ||
905 | if (cmd->comp_func) { | ||
906 | cmd->comp_func(port, | ||
907 | MTIP_TAG_INTERNAL, | ||
908 | cmd->comp_data, | ||
909 | 0); | ||
910 | return; | ||
911 | } | ||
912 | } | ||
913 | |||
914 | dev_warn(&dd->pdev->dev, "IRQ status 0x%x ignored.\n", port_stat); | ||
915 | |||
916 | return; | ||
917 | } | ||
918 | |||
919 | /* | ||
920 | * Demux and handle errors | ||
921 | */ | ||
922 | static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) | ||
923 | { | ||
924 | if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) | ||
925 | mtip_handle_tfe(dd); | ||
926 | |||
927 | if (unlikely(port_stat & PORT_IRQ_CONNECT)) { | ||
928 | dev_warn(&dd->pdev->dev, | ||
929 | "Clearing PxSERR.DIAG.x\n"); | ||
930 | writel((1 << 26), dd->port->mmio + PORT_SCR_ERR); | ||
931 | } | ||
932 | |||
933 | if (unlikely(port_stat & PORT_IRQ_PHYRDY)) { | ||
934 | dev_warn(&dd->pdev->dev, | ||
935 | "Clearing PxSERR.DIAG.n\n"); | ||
936 | writel((1 << 16), dd->port->mmio + PORT_SCR_ERR); | ||
937 | } | ||
938 | |||
939 | if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) { | ||
940 | dev_warn(&dd->pdev->dev, | ||
941 | "Port stat errors %x unhandled\n", | ||
942 | (port_stat & ~PORT_IRQ_HANDLED)); | ||
943 | } | ||
944 | } | ||
945 | |||
946 | static inline irqreturn_t mtip_handle_irq(struct driver_data *data) | ||
947 | { | ||
948 | struct driver_data *dd = (struct driver_data *) data; | ||
949 | struct mtip_port *port = dd->port; | ||
950 | u32 hba_stat, port_stat; | ||
951 | int rv = IRQ_NONE; | ||
952 | |||
953 | hba_stat = readl(dd->mmio + HOST_IRQ_STAT); | ||
954 | if (hba_stat) { | ||
955 | rv = IRQ_HANDLED; | ||
956 | |||
957 | /* Acknowledge the interrupt status on the port.*/ | ||
958 | port_stat = readl(port->mmio + PORT_IRQ_STAT); | ||
959 | writel(port_stat, port->mmio + PORT_IRQ_STAT); | ||
960 | |||
961 | /* Demux port status */ | ||
962 | if (likely(port_stat & PORT_IRQ_SDB_FIS)) | ||
963 | mtip_process_sdbf(dd); | ||
964 | |||
965 | if (unlikely(port_stat & PORT_IRQ_ERR)) { | ||
966 | if (unlikely(mtip_check_surprise_removal(dd->pdev))) { | ||
967 | mtip_command_cleanup(dd); | ||
968 | /* don't proceed further */ | ||
969 | return IRQ_HANDLED; | ||
970 | } | ||
971 | |||
972 | mtip_process_errors(dd, port_stat & PORT_IRQ_ERR); | ||
973 | } | ||
974 | |||
975 | if (unlikely(port_stat & PORT_IRQ_LEGACY)) | ||
976 | mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY); | ||
977 | } | ||
978 | |||
979 | /* acknowledge interrupt */ | ||
980 | writel(hba_stat, dd->mmio + HOST_IRQ_STAT); | ||
981 | |||
982 | return rv; | ||
983 | } | ||
984 | |||
985 | /* | ||
986 | * Wrapper for mtip_handle_irq | ||
987 | * (ignores return code) | ||
988 | */ | ||
989 | static void mtip_tasklet(unsigned long data) | ||
990 | { | ||
991 | mtip_handle_irq((struct driver_data *) data); | ||
992 | } | ||
993 | |||
994 | /* | ||
995 | * HBA interrupt subroutine. | ||
996 | * | ||
997 | * @irq IRQ number. | ||
998 | * @instance Pointer to the driver data structure. | ||
999 | * | ||
1000 | * return value | ||
1001 | * IRQ_HANDLED A HBA interrupt was pending and handled. | ||
1002 | * IRQ_NONE This interrupt was not for the HBA. | ||
1003 | */ | ||
1004 | static irqreturn_t mtip_irq_handler(int irq, void *instance) | ||
1005 | { | ||
1006 | struct driver_data *dd = instance; | ||
1007 | tasklet_schedule(&dd->tasklet); | ||
1008 | return IRQ_HANDLED; | ||
1009 | } | ||
1010 | |||
1011 | static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) | ||
1012 | { | ||
1013 | atomic_set(&port->commands[tag].active, 1); | ||
1014 | writel(1 << MTIP_TAG_BIT(tag), | ||
1015 | port->cmd_issue[MTIP_TAG_INDEX(tag)]); | ||
1016 | } | ||
1017 | |||
1018 | /* | ||
1019 | * Wait for port to quiesce | ||
1020 | * | ||
1021 | * @port Pointer to port data structure | ||
1022 | * @timeout Max duration to wait (ms) | ||
1023 | * | ||
1024 | * return value | ||
1025 | * 0 Success | ||
1026 | * -EBUSY Commands still active | ||
1027 | */ | ||
1028 | static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) | ||
1029 | { | ||
1030 | unsigned long to; | ||
1031 | unsigned int n; | ||
1032 | unsigned int active = 1; | ||
1033 | |||
1034 | to = jiffies + msecs_to_jiffies(timeout); | ||
1035 | do { | ||
1036 | if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags) && | ||
1037 | test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) { | ||
1038 | msleep(20); | ||
1039 | continue; /* svc thd is actively issuing commands */ | ||
1040 | } | ||
1041 | /* | ||
1042 | * Ignore s_active bit 0 of array element 0. | ||
1043 | * This bit will always be set | ||
1044 | */ | ||
1045 | active = readl(port->s_active[0]) & 0xFFFFFFFE; | ||
1046 | for (n = 1; n < port->dd->slot_groups; n++) | ||
1047 | active |= readl(port->s_active[n]); | ||
1048 | |||
1049 | if (!active) | ||
1050 | break; | ||
1051 | |||
1052 | msleep(20); | ||
1053 | } while (time_before(jiffies, to)); | ||
1054 | |||
1055 | return active ? -EBUSY : 0; | ||
1056 | } | ||
1057 | |||
1058 | /* | ||
1059 | * Execute an internal command and wait for the completion. | ||
1060 | * | ||
1061 | * @port Pointer to the port data structure. | ||
1062 | * @fis Pointer to the FIS that describes the command. | ||
1063 | * @fis_len Length in WORDS of the FIS. | ||
1064 | * @buffer DMA accessible for command data. | ||
1065 | * @buf_len Length, in bytes, of the data buffer. | ||
1066 | * @opts Command header options, excluding the FIS length | ||
1067 | * and the number of PRD entries. | ||
1068 | * @timeout Time in ms to wait for the command to complete. | ||
1069 | * | ||
1070 | * return value | ||
1071 | * 0 Command completed successfully. | ||
1072 | * -EFAULT The buffer address is not correctly aligned. | ||
1073 | * -EBUSY Internal command or other IO in progress. | ||
1074 | * -EAGAIN Time out waiting for command to complete. | ||
1075 | */ | ||
1076 | static int mtip_exec_internal_command(struct mtip_port *port, | ||
1077 | void *fis, | ||
1078 | int fis_len, | ||
1079 | dma_addr_t buffer, | ||
1080 | int buf_len, | ||
1081 | u32 opts, | ||
1082 | gfp_t atomic, | ||
1083 | unsigned long timeout) | ||
1084 | { | ||
1085 | struct mtip_cmd_sg *command_sg; | ||
1086 | DECLARE_COMPLETION_ONSTACK(wait); | ||
1087 | int rv = 0; | ||
1088 | struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; | ||
1089 | |||
1090 | /* Make sure the buffer is 8 byte aligned. This is asic specific. */ | ||
1091 | if (buffer & 0x00000007) { | ||
1092 | dev_err(&port->dd->pdev->dev, | ||
1093 | "SG buffer is not 8 byte aligned\n"); | ||
1094 | return -EFAULT; | ||
1095 | } | ||
1096 | |||
1097 | /* Only one internal command should be running at a time */ | ||
1098 | if (test_and_set_bit(MTIP_TAG_INTERNAL, port->allocated)) { | ||
1099 | dev_warn(&port->dd->pdev->dev, | ||
1100 | "Internal command already active\n"); | ||
1101 | return -EBUSY; | ||
1102 | } | ||
1103 | set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); | ||
1104 | |||
1105 | if (atomic == GFP_KERNEL) { | ||
1106 | /* wait for io to complete if non atomic */ | ||
1107 | if (mtip_quiesce_io(port, 5000) < 0) { | ||
1108 | dev_warn(&port->dd->pdev->dev, | ||
1109 | "Failed to quiesce IO\n"); | ||
1110 | release_slot(port, MTIP_TAG_INTERNAL); | ||
1111 | clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); | ||
1112 | wake_up_interruptible(&port->svc_wait); | ||
1113 | return -EBUSY; | ||
1114 | } | ||
1115 | |||
1116 | /* Set the completion function and data for the command. */ | ||
1117 | int_cmd->comp_data = &wait; | ||
1118 | int_cmd->comp_func = mtip_completion; | ||
1119 | |||
1120 | } else { | ||
1121 | /* Clear completion - we're going to poll */ | ||
1122 | int_cmd->comp_data = NULL; | ||
1123 | int_cmd->comp_func = NULL; | ||
1124 | } | ||
1125 | |||
1126 | /* Copy the command to the command table */ | ||
1127 | memcpy(int_cmd->command, fis, fis_len*4); | ||
1128 | |||
1129 | /* Populate the SG list */ | ||
1130 | int_cmd->command_header->opts = | ||
1131 | __force_bit2int cpu_to_le32(opts | fis_len); | ||
1132 | if (buf_len) { | ||
1133 | command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ; | ||
1134 | |||
1135 | command_sg->info = | ||
1136 | __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF); | ||
1137 | command_sg->dba = | ||
1138 | __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF); | ||
1139 | command_sg->dba_upper = | ||
1140 | __force_bit2int cpu_to_le32((buffer >> 16) >> 16); | ||
1141 | |||
1142 | int_cmd->command_header->opts |= | ||
1143 | __force_bit2int cpu_to_le32((1 << 16)); | ||
1144 | } | ||
1145 | |||
1146 | /* Populate the command header */ | ||
1147 | int_cmd->command_header->byte_count = 0; | ||
1148 | |||
1149 | /* Issue the command to the hardware */ | ||
1150 | mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); | ||
1151 | |||
1152 | /* Poll if atomic, wait_for_completion otherwise */ | ||
1153 | if (atomic == GFP_KERNEL) { | ||
1154 | /* Wait for the command to complete or timeout. */ | ||
1155 | if (wait_for_completion_timeout( | ||
1156 | &wait, | ||
1157 | msecs_to_jiffies(timeout)) == 0) { | ||
1158 | dev_err(&port->dd->pdev->dev, | ||
1159 | "Internal command did not complete [%d] " | ||
1160 | "within timeout of %lu ms\n", | ||
1161 | atomic, timeout); | ||
1162 | rv = -EAGAIN; | ||
1163 | } | ||
1164 | |||
1165 | if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) | ||
1166 | & (1 << MTIP_TAG_INTERNAL)) { | ||
1167 | dev_warn(&port->dd->pdev->dev, | ||
1168 | "Retiring internal command but CI is 1.\n"); | ||
1169 | } | ||
1170 | |||
1171 | } else { | ||
1172 | /* Spin for <timeout> checking if command still outstanding */ | ||
1173 | timeout = jiffies + msecs_to_jiffies(timeout); | ||
1174 | |||
1175 | while ((readl( | ||
1176 | port->cmd_issue[MTIP_TAG_INTERNAL]) | ||
1177 | & (1 << MTIP_TAG_INTERNAL)) | ||
1178 | && time_before(jiffies, timeout)) | ||
1179 | ; | ||
1180 | |||
1181 | if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) | ||
1182 | & (1 << MTIP_TAG_INTERNAL)) { | ||
1183 | dev_err(&port->dd->pdev->dev, | ||
1184 | "Internal command did not complete [%d]\n", | ||
1185 | atomic); | ||
1186 | rv = -EAGAIN; | ||
1187 | } | ||
1188 | } | ||
1189 | |||
1190 | /* Clear the allocated and active bits for the internal command. */ | ||
1191 | atomic_set(&int_cmd->active, 0); | ||
1192 | release_slot(port, MTIP_TAG_INTERNAL); | ||
1193 | clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); | ||
1194 | wake_up_interruptible(&port->svc_wait); | ||
1195 | |||
1196 | return rv; | ||
1197 | } | ||
1198 | |||
1199 | /* | ||
1200 | * Byte-swap ATA ID strings. | ||
1201 | * | ||
1202 | * ATA identify data contains strings in byte-swapped 16-bit words. | ||
1203 | * They must be swapped (on all architectures) to be usable as C strings. | ||
1204 | * This function swaps bytes in-place. | ||
1205 | * | ||
1206 | * @buf The buffer location of the string | ||
1207 | * @len The number of bytes to swap | ||
1208 | * | ||
1209 | * return value | ||
1210 | * None | ||
1211 | */ | ||
1212 | static inline void ata_swap_string(u16 *buf, unsigned int len) | ||
1213 | { | ||
1214 | int i; | ||
1215 | for (i = 0; i < (len/2); i++) | ||
1216 | be16_to_cpus(&buf[i]); | ||
1217 | } | ||
1218 | |||
1219 | /* | ||
1220 | * Request the device identity information. | ||
1221 | * | ||
1222 | * If a user space buffer is not specified, i.e. is NULL, the | ||
1223 | * identify information is still read from the drive and placed | ||
1224 | * into the identify data buffer (@e port->identify) in the | ||
1225 | * port data structure. | ||
1226 | * When the identify buffer contains valid identify information @e | ||
1227 | * port->identify_valid is non-zero. | ||
1228 | * | ||
1229 | * @port Pointer to the port structure. | ||
1230 | * @user_buffer A user space buffer where the identify data should be | ||
1231 | * copied. | ||
1232 | * | ||
1233 | * return value | ||
1234 | * 0 Command completed successfully. | ||
1235 | * -EFAULT An error occurred while coping data to the user buffer. | ||
1236 | * -1 Command failed. | ||
1237 | */ | ||
1238 | static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) | ||
1239 | { | ||
1240 | int rv = 0; | ||
1241 | struct host_to_dev_fis fis; | ||
1242 | |||
1243 | /* Build the FIS. */ | ||
1244 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1245 | fis.type = 0x27; | ||
1246 | fis.opts = 1 << 7; | ||
1247 | fis.command = ATA_CMD_ID_ATA; | ||
1248 | |||
1249 | /* Set the identify information as invalid. */ | ||
1250 | port->identify_valid = 0; | ||
1251 | |||
1252 | /* Clear the identify information. */ | ||
1253 | memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS); | ||
1254 | |||
1255 | /* Execute the command. */ | ||
1256 | if (mtip_exec_internal_command(port, | ||
1257 | &fis, | ||
1258 | 5, | ||
1259 | port->identify_dma, | ||
1260 | sizeof(u16) * ATA_ID_WORDS, | ||
1261 | 0, | ||
1262 | GFP_KERNEL, | ||
1263 | MTIP_INTERNAL_COMMAND_TIMEOUT_MS) | ||
1264 | < 0) { | ||
1265 | rv = -1; | ||
1266 | goto out; | ||
1267 | } | ||
1268 | |||
1269 | /* | ||
1270 | * Perform any necessary byte-swapping. Yes, the kernel does in fact | ||
1271 | * perform field-sensitive swapping on the string fields. | ||
1272 | * See the kernel use of ata_id_string() for proof of this. | ||
1273 | */ | ||
1274 | #ifdef __LITTLE_ENDIAN | ||
1275 | ata_swap_string(port->identify + 27, 40); /* model string*/ | ||
1276 | ata_swap_string(port->identify + 23, 8); /* firmware string*/ | ||
1277 | ata_swap_string(port->identify + 10, 20); /* serial# string*/ | ||
1278 | #else | ||
1279 | { | ||
1280 | int i; | ||
1281 | for (i = 0; i < ATA_ID_WORDS; i++) | ||
1282 | port->identify[i] = le16_to_cpu(port->identify[i]); | ||
1283 | } | ||
1284 | #endif | ||
1285 | |||
1286 | /* Set the identify buffer as valid. */ | ||
1287 | port->identify_valid = 1; | ||
1288 | |||
1289 | if (user_buffer) { | ||
1290 | if (copy_to_user( | ||
1291 | user_buffer, | ||
1292 | port->identify, | ||
1293 | ATA_ID_WORDS * sizeof(u16))) { | ||
1294 | rv = -EFAULT; | ||
1295 | goto out; | ||
1296 | } | ||
1297 | } | ||
1298 | |||
1299 | out: | ||
1300 | return rv; | ||
1301 | } | ||
1302 | |||
1303 | /* | ||
1304 | * Issue a standby immediate command to the device. | ||
1305 | * | ||
1306 | * @port Pointer to the port structure. | ||
1307 | * | ||
1308 | * return value | ||
1309 | * 0 Command was executed successfully. | ||
1310 | * -1 An error occurred while executing the command. | ||
1311 | */ | ||
1312 | static int mtip_standby_immediate(struct mtip_port *port) | ||
1313 | { | ||
1314 | int rv; | ||
1315 | struct host_to_dev_fis fis; | ||
1316 | |||
1317 | /* Build the FIS. */ | ||
1318 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1319 | fis.type = 0x27; | ||
1320 | fis.opts = 1 << 7; | ||
1321 | fis.command = ATA_CMD_STANDBYNOW1; | ||
1322 | |||
1323 | /* Execute the command. Use a 15-second timeout for large drives. */ | ||
1324 | rv = mtip_exec_internal_command(port, | ||
1325 | &fis, | ||
1326 | 5, | ||
1327 | 0, | ||
1328 | 0, | ||
1329 | 0, | ||
1330 | GFP_KERNEL, | ||
1331 | 15000); | ||
1332 | |||
1333 | return rv; | ||
1334 | } | ||
1335 | |||
1336 | /* | ||
1337 | * Get the drive capacity. | ||
1338 | * | ||
1339 | * @dd Pointer to the device data structure. | ||
1340 | * @sectors Pointer to the variable that will receive the sector count. | ||
1341 | * | ||
1342 | * return value | ||
1343 | * 1 Capacity was returned successfully. | ||
1344 | * 0 The identify information is invalid. | ||
1345 | */ | ||
1346 | static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors) | ||
1347 | { | ||
1348 | struct mtip_port *port = dd->port; | ||
1349 | u64 total, raw0, raw1, raw2, raw3; | ||
1350 | raw0 = port->identify[100]; | ||
1351 | raw1 = port->identify[101]; | ||
1352 | raw2 = port->identify[102]; | ||
1353 | raw3 = port->identify[103]; | ||
1354 | total = raw0 | raw1<<16 | raw2<<32 | raw3<<48; | ||
1355 | *sectors = total; | ||
1356 | return (bool) !!port->identify_valid; | ||
1357 | } | ||
1358 | |||
1359 | /* | ||
1360 | * Reset the HBA. | ||
1361 | * | ||
1362 | * Resets the HBA by setting the HBA Reset bit in the Global | ||
1363 | * HBA Control register. After setting the HBA Reset bit the | ||
1364 | * function waits for 1 second before reading the HBA Reset | ||
1365 | * bit to make sure it has cleared. If HBA Reset is not clear | ||
1366 | * an error is returned. Cannot be used in non-blockable | ||
1367 | * context. | ||
1368 | * | ||
1369 | * @dd Pointer to the driver data structure. | ||
1370 | * | ||
1371 | * return value | ||
1372 | * 0 The reset was successful. | ||
1373 | * -1 The HBA Reset bit did not clear. | ||
1374 | */ | ||
1375 | static int mtip_hba_reset(struct driver_data *dd) | ||
1376 | { | ||
1377 | mtip_deinit_port(dd->port); | ||
1378 | |||
1379 | /* Set the reset bit */ | ||
1380 | writel(HOST_RESET, dd->mmio + HOST_CTL); | ||
1381 | |||
1382 | /* Flush */ | ||
1383 | readl(dd->mmio + HOST_CTL); | ||
1384 | |||
1385 | /* Wait for reset to clear */ | ||
1386 | ssleep(1); | ||
1387 | |||
1388 | /* Check the bit has cleared */ | ||
1389 | if (readl(dd->mmio + HOST_CTL) & HOST_RESET) { | ||
1390 | dev_err(&dd->pdev->dev, | ||
1391 | "Reset bit did not clear.\n"); | ||
1392 | return -1; | ||
1393 | } | ||
1394 | |||
1395 | return 0; | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1399 | * Display the identify command data. | ||
1400 | * | ||
1401 | * @port Pointer to the port data structure. | ||
1402 | * | ||
1403 | * return value | ||
1404 | * None | ||
1405 | */ | ||
1406 | static void mtip_dump_identify(struct mtip_port *port) | ||
1407 | { | ||
1408 | sector_t sectors; | ||
1409 | unsigned short revid; | ||
1410 | char cbuf[42]; | ||
1411 | |||
1412 | if (!port->identify_valid) | ||
1413 | return; | ||
1414 | |||
1415 | strlcpy(cbuf, (char *)(port->identify+10), 21); | ||
1416 | dev_info(&port->dd->pdev->dev, | ||
1417 | "Serial No.: %s\n", cbuf); | ||
1418 | |||
1419 | strlcpy(cbuf, (char *)(port->identify+23), 9); | ||
1420 | dev_info(&port->dd->pdev->dev, | ||
1421 | "Firmware Ver.: %s\n", cbuf); | ||
1422 | |||
1423 | strlcpy(cbuf, (char *)(port->identify+27), 41); | ||
1424 | dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf); | ||
1425 | |||
1426 | if (mtip_hw_get_capacity(port->dd, §ors)) | ||
1427 | dev_info(&port->dd->pdev->dev, | ||
1428 | "Capacity: %llu sectors (%llu MB)\n", | ||
1429 | (u64)sectors, | ||
1430 | ((u64)sectors) * ATA_SECT_SIZE >> 20); | ||
1431 | |||
1432 | pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid); | ||
1433 | switch (revid & 0xFF) { | ||
1434 | case 0x1: | ||
1435 | strlcpy(cbuf, "A0", 3); | ||
1436 | break; | ||
1437 | case 0x3: | ||
1438 | strlcpy(cbuf, "A2", 3); | ||
1439 | break; | ||
1440 | default: | ||
1441 | strlcpy(cbuf, "?", 2); | ||
1442 | break; | ||
1443 | } | ||
1444 | dev_info(&port->dd->pdev->dev, | ||
1445 | "Card Type: %s\n", cbuf); | ||
1446 | } | ||
1447 | |||
1448 | /* | ||
1449 | * Map the commands scatter list into the command table. | ||
1450 | * | ||
1451 | * @command Pointer to the command. | ||
1452 | * @nents Number of scatter list entries. | ||
1453 | * | ||
1454 | * return value | ||
1455 | * None | ||
1456 | */ | ||
1457 | static inline void fill_command_sg(struct driver_data *dd, | ||
1458 | struct mtip_cmd *command, | ||
1459 | int nents) | ||
1460 | { | ||
1461 | int n; | ||
1462 | unsigned int dma_len; | ||
1463 | struct mtip_cmd_sg *command_sg; | ||
1464 | struct scatterlist *sg = command->sg; | ||
1465 | |||
1466 | command_sg = command->command + AHCI_CMD_TBL_HDR_SZ; | ||
1467 | |||
1468 | for (n = 0; n < nents; n++) { | ||
1469 | dma_len = sg_dma_len(sg); | ||
1470 | if (dma_len > 0x400000) | ||
1471 | dev_err(&dd->pdev->dev, | ||
1472 | "DMA segment length truncated\n"); | ||
1473 | command_sg->info = __force_bit2int | ||
1474 | cpu_to_le32((dma_len-1) & 0x3FFFFF); | ||
1475 | command_sg->dba = __force_bit2int | ||
1476 | cpu_to_le32(sg_dma_address(sg)); | ||
1477 | command_sg->dba_upper = __force_bit2int | ||
1478 | cpu_to_le32((sg_dma_address(sg) >> 16) >> 16); | ||
1479 | command_sg++; | ||
1480 | sg++; | ||
1481 | } | ||
1482 | } | ||
1483 | |||
1484 | /* | ||
1485 | * @brief Execute a drive command. | ||
1486 | * | ||
1487 | * return value 0 The command completed successfully. | ||
1488 | * return value -1 An error occurred while executing the command. | ||
1489 | */ | ||
1490 | static int exec_drive_task(struct mtip_port *port, u8 *command) | ||
1491 | { | ||
1492 | struct host_to_dev_fis fis; | ||
1493 | struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); | ||
1494 | |||
1495 | /* Build the FIS. */ | ||
1496 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1497 | fis.type = 0x27; | ||
1498 | fis.opts = 1 << 7; | ||
1499 | fis.command = command[0]; | ||
1500 | fis.features = command[1]; | ||
1501 | fis.sect_count = command[2]; | ||
1502 | fis.sector = command[3]; | ||
1503 | fis.cyl_low = command[4]; | ||
1504 | fis.cyl_hi = command[5]; | ||
1505 | fis.device = command[6] & ~0x10; /* Clear the dev bit*/ | ||
1506 | |||
1507 | |||
1508 | dbg_printk(MTIP_DRV_NAME "%s: User Command: cmd %x, feat %x, " | ||
1509 | "nsect %x, sect %x, lcyl %x, " | ||
1510 | "hcyl %x, sel %x\n", | ||
1511 | __func__, | ||
1512 | command[0], | ||
1513 | command[1], | ||
1514 | command[2], | ||
1515 | command[3], | ||
1516 | command[4], | ||
1517 | command[5], | ||
1518 | command[6]); | ||
1519 | |||
1520 | /* Execute the command. */ | ||
1521 | if (mtip_exec_internal_command(port, | ||
1522 | &fis, | ||
1523 | 5, | ||
1524 | 0, | ||
1525 | 0, | ||
1526 | 0, | ||
1527 | GFP_KERNEL, | ||
1528 | MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) { | ||
1529 | return -1; | ||
1530 | } | ||
1531 | |||
1532 | command[0] = reply->command; /* Status*/ | ||
1533 | command[1] = reply->features; /* Error*/ | ||
1534 | command[4] = reply->cyl_low; | ||
1535 | command[5] = reply->cyl_hi; | ||
1536 | |||
1537 | dbg_printk(MTIP_DRV_NAME "%s: Completion Status: stat %x, " | ||
1538 | "err %x , cyl_lo %x cyl_hi %x\n", | ||
1539 | __func__, | ||
1540 | command[0], | ||
1541 | command[1], | ||
1542 | command[4], | ||
1543 | command[5]); | ||
1544 | |||
1545 | return 0; | ||
1546 | } | ||
1547 | |||
1548 | /* | ||
1549 | * @brief Execute a drive command. | ||
1550 | * | ||
1551 | * @param port Pointer to the port data structure. | ||
1552 | * @param command Pointer to the user specified command parameters. | ||
1553 | * @param user_buffer Pointer to the user space buffer where read sector | ||
1554 | * data should be copied. | ||
1555 | * | ||
1556 | * return value 0 The command completed successfully. | ||
1557 | * return value -EFAULT An error occurred while copying the completion | ||
1558 | * data to the user space buffer. | ||
1559 | * return value -1 An error occurred while executing the command. | ||
1560 | */ | ||
1561 | static int exec_drive_command(struct mtip_port *port, u8 *command, | ||
1562 | void __user *user_buffer) | ||
1563 | { | ||
1564 | struct host_to_dev_fis fis; | ||
1565 | struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); | ||
1566 | |||
1567 | /* Build the FIS. */ | ||
1568 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1569 | fis.type = 0x27; | ||
1570 | fis.opts = 1 << 7; | ||
1571 | fis.command = command[0]; | ||
1572 | fis.features = command[2]; | ||
1573 | fis.sect_count = command[3]; | ||
1574 | if (fis.command == ATA_CMD_SMART) { | ||
1575 | fis.sector = command[1]; | ||
1576 | fis.cyl_low = 0x4F; | ||
1577 | fis.cyl_hi = 0xC2; | ||
1578 | } | ||
1579 | |||
1580 | dbg_printk(MTIP_DRV_NAME | ||
1581 | "%s: User Command: cmd %x, sect %x, " | ||
1582 | "feat %x, sectcnt %x\n", | ||
1583 | __func__, | ||
1584 | command[0], | ||
1585 | command[1], | ||
1586 | command[2], | ||
1587 | command[3]); | ||
1588 | |||
1589 | memset(port->sector_buffer, 0x00, ATA_SECT_SIZE); | ||
1590 | |||
1591 | /* Execute the command. */ | ||
1592 | if (mtip_exec_internal_command(port, | ||
1593 | &fis, | ||
1594 | 5, | ||
1595 | port->sector_buffer_dma, | ||
1596 | (command[3] != 0) ? ATA_SECT_SIZE : 0, | ||
1597 | 0, | ||
1598 | GFP_KERNEL, | ||
1599 | MTIP_IOCTL_COMMAND_TIMEOUT_MS) | ||
1600 | < 0) { | ||
1601 | return -1; | ||
1602 | } | ||
1603 | |||
1604 | /* Collect the completion status. */ | ||
1605 | command[0] = reply->command; /* Status*/ | ||
1606 | command[1] = reply->features; /* Error*/ | ||
1607 | command[2] = command[3]; | ||
1608 | |||
1609 | dbg_printk(MTIP_DRV_NAME | ||
1610 | "%s: Completion Status: stat %x, " | ||
1611 | "err %x, cmd %x\n", | ||
1612 | __func__, | ||
1613 | command[0], | ||
1614 | command[1], | ||
1615 | command[2]); | ||
1616 | |||
1617 | if (user_buffer && command[3]) { | ||
1618 | if (copy_to_user(user_buffer, | ||
1619 | port->sector_buffer, | ||
1620 | ATA_SECT_SIZE * command[3])) { | ||
1621 | return -EFAULT; | ||
1622 | } | ||
1623 | } | ||
1624 | |||
1625 | return 0; | ||
1626 | } | ||
1627 | |||
1628 | /* | ||
1629 | * Indicates whether a command has a single sector payload. | ||
1630 | * | ||
1631 | * @command passed to the device to perform the certain event. | ||
1632 | * @features passed to the device to perform the certain event. | ||
1633 | * | ||
1634 | * return value | ||
1635 | * 1 command is one that always has a single sector payload, | ||
1636 | * regardless of the value in the Sector Count field. | ||
1637 | * 0 otherwise | ||
1638 | * | ||
1639 | */ | ||
1640 | static unsigned int implicit_sector(unsigned char command, | ||
1641 | unsigned char features) | ||
1642 | { | ||
1643 | unsigned int rv = 0; | ||
1644 | |||
1645 | /* list of commands that have an implicit sector count of 1 */ | ||
1646 | switch (command) { | ||
1647 | case ATA_CMD_SEC_SET_PASS: | ||
1648 | case ATA_CMD_SEC_UNLOCK: | ||
1649 | case ATA_CMD_SEC_ERASE_PREP: | ||
1650 | case ATA_CMD_SEC_ERASE_UNIT: | ||
1651 | case ATA_CMD_SEC_FREEZE_LOCK: | ||
1652 | case ATA_CMD_SEC_DISABLE_PASS: | ||
1653 | case ATA_CMD_PMP_READ: | ||
1654 | case ATA_CMD_PMP_WRITE: | ||
1655 | rv = 1; | ||
1656 | break; | ||
1657 | case ATA_CMD_SET_MAX: | ||
1658 | if (features == ATA_SET_MAX_UNLOCK) | ||
1659 | rv = 1; | ||
1660 | break; | ||
1661 | case ATA_CMD_SMART: | ||
1662 | if ((features == ATA_SMART_READ_VALUES) || | ||
1663 | (features == ATA_SMART_READ_THRESHOLDS)) | ||
1664 | rv = 1; | ||
1665 | break; | ||
1666 | case ATA_CMD_CONF_OVERLAY: | ||
1667 | if ((features == ATA_DCO_IDENTIFY) || | ||
1668 | (features == ATA_DCO_SET)) | ||
1669 | rv = 1; | ||
1670 | break; | ||
1671 | } | ||
1672 | return rv; | ||
1673 | } | ||
1674 | |||
1675 | /* | ||
1676 | * Executes a taskfile | ||
1677 | * See ide_taskfile_ioctl() for derivation | ||
1678 | */ | ||
1679 | static int exec_drive_taskfile(struct driver_data *dd, | ||
1680 | void __user *buf, | ||
1681 | ide_task_request_t *req_task, | ||
1682 | int outtotal) | ||
1683 | { | ||
1684 | struct host_to_dev_fis fis; | ||
1685 | struct host_to_dev_fis *reply; | ||
1686 | u8 *outbuf = NULL; | ||
1687 | u8 *inbuf = NULL; | ||
1688 | dma_addr_t outbuf_dma = 0; | ||
1689 | dma_addr_t inbuf_dma = 0; | ||
1690 | dma_addr_t dma_buffer = 0; | ||
1691 | int err = 0; | ||
1692 | unsigned int taskin = 0; | ||
1693 | unsigned int taskout = 0; | ||
1694 | u8 nsect = 0; | ||
1695 | unsigned int timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; | ||
1696 | unsigned int force_single_sector; | ||
1697 | unsigned int transfer_size; | ||
1698 | unsigned long task_file_data; | ||
1699 | int intotal = outtotal + req_task->out_size; | ||
1700 | |||
1701 | taskout = req_task->out_size; | ||
1702 | taskin = req_task->in_size; | ||
1703 | /* 130560 = 512 * 0xFF*/ | ||
1704 | if (taskin > 130560 || taskout > 130560) { | ||
1705 | err = -EINVAL; | ||
1706 | goto abort; | ||
1707 | } | ||
1708 | |||
1709 | if (taskout) { | ||
1710 | outbuf = kzalloc(taskout, GFP_KERNEL); | ||
1711 | if (outbuf == NULL) { | ||
1712 | err = -ENOMEM; | ||
1713 | goto abort; | ||
1714 | } | ||
1715 | if (copy_from_user(outbuf, buf + outtotal, taskout)) { | ||
1716 | err = -EFAULT; | ||
1717 | goto abort; | ||
1718 | } | ||
1719 | outbuf_dma = pci_map_single(dd->pdev, | ||
1720 | outbuf, | ||
1721 | taskout, | ||
1722 | DMA_TO_DEVICE); | ||
1723 | if (outbuf_dma == 0) { | ||
1724 | err = -ENOMEM; | ||
1725 | goto abort; | ||
1726 | } | ||
1727 | dma_buffer = outbuf_dma; | ||
1728 | } | ||
1729 | |||
1730 | if (taskin) { | ||
1731 | inbuf = kzalloc(taskin, GFP_KERNEL); | ||
1732 | if (inbuf == NULL) { | ||
1733 | err = -ENOMEM; | ||
1734 | goto abort; | ||
1735 | } | ||
1736 | |||
1737 | if (copy_from_user(inbuf, buf + intotal, taskin)) { | ||
1738 | err = -EFAULT; | ||
1739 | goto abort; | ||
1740 | } | ||
1741 | inbuf_dma = pci_map_single(dd->pdev, | ||
1742 | inbuf, | ||
1743 | taskin, DMA_FROM_DEVICE); | ||
1744 | if (inbuf_dma == 0) { | ||
1745 | err = -ENOMEM; | ||
1746 | goto abort; | ||
1747 | } | ||
1748 | dma_buffer = inbuf_dma; | ||
1749 | } | ||
1750 | |||
1751 | /* only supports PIO and non-data commands from this ioctl. */ | ||
1752 | switch (req_task->data_phase) { | ||
1753 | case TASKFILE_OUT: | ||
1754 | nsect = taskout / ATA_SECT_SIZE; | ||
1755 | reply = (dd->port->rxfis + RX_FIS_PIO_SETUP); | ||
1756 | break; | ||
1757 | case TASKFILE_IN: | ||
1758 | reply = (dd->port->rxfis + RX_FIS_PIO_SETUP); | ||
1759 | break; | ||
1760 | case TASKFILE_NO_DATA: | ||
1761 | reply = (dd->port->rxfis + RX_FIS_D2H_REG); | ||
1762 | break; | ||
1763 | default: | ||
1764 | err = -EINVAL; | ||
1765 | goto abort; | ||
1766 | } | ||
1767 | |||
1768 | /* Build the FIS. */ | ||
1769 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1770 | |||
1771 | fis.type = 0x27; | ||
1772 | fis.opts = 1 << 7; | ||
1773 | fis.command = req_task->io_ports[7]; | ||
1774 | fis.features = req_task->io_ports[1]; | ||
1775 | fis.sect_count = req_task->io_ports[2]; | ||
1776 | fis.lba_low = req_task->io_ports[3]; | ||
1777 | fis.lba_mid = req_task->io_ports[4]; | ||
1778 | fis.lba_hi = req_task->io_ports[5]; | ||
1779 | /* Clear the dev bit*/ | ||
1780 | fis.device = req_task->io_ports[6] & ~0x10; | ||
1781 | |||
1782 | if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) { | ||
1783 | req_task->in_flags.all = | ||
1784 | IDE_TASKFILE_STD_IN_FLAGS | | ||
1785 | (IDE_HOB_STD_IN_FLAGS << 8); | ||
1786 | fis.lba_low_ex = req_task->hob_ports[3]; | ||
1787 | fis.lba_mid_ex = req_task->hob_ports[4]; | ||
1788 | fis.lba_hi_ex = req_task->hob_ports[5]; | ||
1789 | fis.features_ex = req_task->hob_ports[1]; | ||
1790 | fis.sect_cnt_ex = req_task->hob_ports[2]; | ||
1791 | |||
1792 | } else { | ||
1793 | req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS; | ||
1794 | } | ||
1795 | |||
1796 | force_single_sector = implicit_sector(fis.command, fis.features); | ||
1797 | |||
1798 | if ((taskin || taskout) && (!fis.sect_count)) { | ||
1799 | if (nsect) | ||
1800 | fis.sect_count = nsect; | ||
1801 | else { | ||
1802 | if (!force_single_sector) { | ||
1803 | dev_warn(&dd->pdev->dev, | ||
1804 | "data movement but " | ||
1805 | "sect_count is 0\n"); | ||
1806 | err = -EINVAL; | ||
1807 | goto abort; | ||
1808 | } | ||
1809 | } | ||
1810 | } | ||
1811 | |||
1812 | dbg_printk(MTIP_DRV_NAME | ||
1813 | "taskfile: cmd %x, feat %x, nsect %x," | ||
1814 | " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x," | ||
1815 | " head/dev %x\n", | ||
1816 | fis.command, | ||
1817 | fis.features, | ||
1818 | fis.sect_count, | ||
1819 | fis.lba_low, | ||
1820 | fis.lba_mid, | ||
1821 | fis.lba_hi, | ||
1822 | fis.device); | ||
1823 | |||
1824 | switch (fis.command) { | ||
1825 | case ATA_CMD_DOWNLOAD_MICRO: | ||
1826 | /* Change timeout for Download Microcode to 60 seconds.*/ | ||
1827 | timeout = 60000; | ||
1828 | break; | ||
1829 | case ATA_CMD_SEC_ERASE_UNIT: | ||
1830 | /* Change timeout for Security Erase Unit to 4 minutes.*/ | ||
1831 | timeout = 240000; | ||
1832 | break; | ||
1833 | case ATA_CMD_STANDBYNOW1: | ||
1834 | /* Change timeout for standby immediate to 10 seconds.*/ | ||
1835 | timeout = 10000; | ||
1836 | break; | ||
1837 | case 0xF7: | ||
1838 | case 0xFA: | ||
1839 | /* Change timeout for vendor unique command to 10 secs */ | ||
1840 | timeout = 10000; | ||
1841 | break; | ||
1842 | case ATA_CMD_SMART: | ||
1843 | /* Change timeout for vendor unique command to 10 secs */ | ||
1844 | timeout = 10000; | ||
1845 | break; | ||
1846 | default: | ||
1847 | timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; | ||
1848 | break; | ||
1849 | } | ||
1850 | |||
1851 | /* Determine the correct transfer size.*/ | ||
1852 | if (force_single_sector) | ||
1853 | transfer_size = ATA_SECT_SIZE; | ||
1854 | else | ||
1855 | transfer_size = ATA_SECT_SIZE * fis.sect_count; | ||
1856 | |||
1857 | /* Execute the command.*/ | ||
1858 | if (mtip_exec_internal_command(dd->port, | ||
1859 | &fis, | ||
1860 | 5, | ||
1861 | dma_buffer, | ||
1862 | transfer_size, | ||
1863 | 0, | ||
1864 | GFP_KERNEL, | ||
1865 | timeout) < 0) { | ||
1866 | err = -EIO; | ||
1867 | goto abort; | ||
1868 | } | ||
1869 | |||
1870 | task_file_data = readl(dd->port->mmio+PORT_TFDATA); | ||
1871 | |||
1872 | if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) { | ||
1873 | reply = dd->port->rxfis + RX_FIS_PIO_SETUP; | ||
1874 | req_task->io_ports[7] = reply->control; | ||
1875 | } else { | ||
1876 | reply = dd->port->rxfis + RX_FIS_D2H_REG; | ||
1877 | req_task->io_ports[7] = reply->command; | ||
1878 | } | ||
1879 | |||
1880 | /* reclaim the DMA buffers.*/ | ||
1881 | if (inbuf_dma) | ||
1882 | pci_unmap_single(dd->pdev, inbuf_dma, | ||
1883 | taskin, DMA_FROM_DEVICE); | ||
1884 | if (outbuf_dma) | ||
1885 | pci_unmap_single(dd->pdev, outbuf_dma, | ||
1886 | taskout, DMA_TO_DEVICE); | ||
1887 | inbuf_dma = 0; | ||
1888 | outbuf_dma = 0; | ||
1889 | |||
1890 | /* return the ATA registers to the caller.*/ | ||
1891 | req_task->io_ports[1] = reply->features; | ||
1892 | req_task->io_ports[2] = reply->sect_count; | ||
1893 | req_task->io_ports[3] = reply->lba_low; | ||
1894 | req_task->io_ports[4] = reply->lba_mid; | ||
1895 | req_task->io_ports[5] = reply->lba_hi; | ||
1896 | req_task->io_ports[6] = reply->device; | ||
1897 | |||
1898 | if (req_task->out_flags.all & 1) { | ||
1899 | |||
1900 | req_task->hob_ports[3] = reply->lba_low_ex; | ||
1901 | req_task->hob_ports[4] = reply->lba_mid_ex; | ||
1902 | req_task->hob_ports[5] = reply->lba_hi_ex; | ||
1903 | req_task->hob_ports[1] = reply->features_ex; | ||
1904 | req_task->hob_ports[2] = reply->sect_cnt_ex; | ||
1905 | } | ||
1906 | |||
1907 | /* Com rest after secure erase or lowlevel format */ | ||
1908 | if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) || | ||
1909 | ((fis.command == 0xFC) && | ||
1910 | (fis.features == 0x27 || fis.features == 0x72 || | ||
1911 | fis.features == 0x62 || fis.features == 0x26))) && | ||
1912 | !(reply->command & 1)) { | ||
1913 | mtip_restart_port(dd->port); | ||
1914 | } | ||
1915 | |||
1916 | dbg_printk(MTIP_DRV_NAME | ||
1917 | "%s: Completion: stat %x," | ||
1918 | "err %x, sect_cnt %x, lbalo %x," | ||
1919 | "lbamid %x, lbahi %x, dev %x\n", | ||
1920 | __func__, | ||
1921 | req_task->io_ports[7], | ||
1922 | req_task->io_ports[1], | ||
1923 | req_task->io_ports[2], | ||
1924 | req_task->io_ports[3], | ||
1925 | req_task->io_ports[4], | ||
1926 | req_task->io_ports[5], | ||
1927 | req_task->io_ports[6]); | ||
1928 | |||
1929 | if (taskout) { | ||
1930 | if (copy_to_user(buf + outtotal, outbuf, taskout)) { | ||
1931 | err = -EFAULT; | ||
1932 | goto abort; | ||
1933 | } | ||
1934 | } | ||
1935 | if (taskin) { | ||
1936 | if (copy_to_user(buf + intotal, inbuf, taskin)) { | ||
1937 | err = -EFAULT; | ||
1938 | goto abort; | ||
1939 | } | ||
1940 | } | ||
1941 | abort: | ||
1942 | if (inbuf_dma) | ||
1943 | pci_unmap_single(dd->pdev, inbuf_dma, | ||
1944 | taskin, DMA_FROM_DEVICE); | ||
1945 | if (outbuf_dma) | ||
1946 | pci_unmap_single(dd->pdev, outbuf_dma, | ||
1947 | taskout, DMA_TO_DEVICE); | ||
1948 | kfree(outbuf); | ||
1949 | kfree(inbuf); | ||
1950 | |||
1951 | return err; | ||
1952 | } | ||
1953 | |||
1954 | /* | ||
1955 | * Handle IOCTL calls from the Block Layer. | ||
1956 | * | ||
1957 | * This function is called by the Block Layer when it receives an IOCTL | ||
1958 | * command that it does not understand. If the IOCTL command is not supported | ||
1959 | * this function returns -ENOTTY. | ||
1960 | * | ||
1961 | * @dd Pointer to the driver data structure. | ||
1962 | * @cmd IOCTL command passed from the Block Layer. | ||
1963 | * @arg IOCTL argument passed from the Block Layer. | ||
1964 | * | ||
1965 | * return value | ||
1966 | * 0 The IOCTL completed successfully. | ||
1967 | * -ENOTTY The specified command is not supported. | ||
1968 | * -EFAULT An error occurred copying data to a user space buffer. | ||
1969 | * -EIO An error occurred while executing the command. | ||
1970 | */ | ||
1971 | static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, | ||
1972 | unsigned long arg) | ||
1973 | { | ||
1974 | switch (cmd) { | ||
1975 | case HDIO_GET_IDENTITY: | ||
1976 | if (mtip_get_identify(dd->port, (void __user *) arg) < 0) { | ||
1977 | dev_warn(&dd->pdev->dev, | ||
1978 | "Unable to read identity\n"); | ||
1979 | return -EIO; | ||
1980 | } | ||
1981 | |||
1982 | break; | ||
1983 | case HDIO_DRIVE_CMD: | ||
1984 | { | ||
1985 | u8 drive_command[4]; | ||
1986 | |||
1987 | /* Copy the user command info to our buffer. */ | ||
1988 | if (copy_from_user(drive_command, | ||
1989 | (void __user *) arg, | ||
1990 | sizeof(drive_command))) | ||
1991 | return -EFAULT; | ||
1992 | |||
1993 | /* Execute the drive command. */ | ||
1994 | if (exec_drive_command(dd->port, | ||
1995 | drive_command, | ||
1996 | (void __user *) (arg+4))) | ||
1997 | return -EIO; | ||
1998 | |||
1999 | /* Copy the status back to the users buffer. */ | ||
2000 | if (copy_to_user((void __user *) arg, | ||
2001 | drive_command, | ||
2002 | sizeof(drive_command))) | ||
2003 | return -EFAULT; | ||
2004 | |||
2005 | break; | ||
2006 | } | ||
2007 | case HDIO_DRIVE_TASK: | ||
2008 | { | ||
2009 | u8 drive_command[7]; | ||
2010 | |||
2011 | /* Copy the user command info to our buffer. */ | ||
2012 | if (copy_from_user(drive_command, | ||
2013 | (void __user *) arg, | ||
2014 | sizeof(drive_command))) | ||
2015 | return -EFAULT; | ||
2016 | |||
2017 | /* Execute the drive command. */ | ||
2018 | if (exec_drive_task(dd->port, drive_command)) | ||
2019 | return -EIO; | ||
2020 | |||
2021 | /* Copy the status back to the users buffer. */ | ||
2022 | if (copy_to_user((void __user *) arg, | ||
2023 | drive_command, | ||
2024 | sizeof(drive_command))) | ||
2025 | return -EFAULT; | ||
2026 | |||
2027 | break; | ||
2028 | } | ||
2029 | case HDIO_DRIVE_TASKFILE: { | ||
2030 | ide_task_request_t req_task; | ||
2031 | int ret, outtotal; | ||
2032 | |||
2033 | if (copy_from_user(&req_task, (void __user *) arg, | ||
2034 | sizeof(req_task))) | ||
2035 | return -EFAULT; | ||
2036 | |||
2037 | outtotal = sizeof(req_task); | ||
2038 | |||
2039 | ret = exec_drive_taskfile(dd, (void __user *) arg, | ||
2040 | &req_task, outtotal); | ||
2041 | |||
2042 | if (copy_to_user((void __user *) arg, &req_task, | ||
2043 | sizeof(req_task))) | ||
2044 | return -EFAULT; | ||
2045 | |||
2046 | return ret; | ||
2047 | } | ||
2048 | |||
2049 | default: | ||
2050 | return -EINVAL; | ||
2051 | } | ||
2052 | return 0; | ||
2053 | } | ||
2054 | |||
2055 | /* | ||
2056 | * Submit an IO to the hw | ||
2057 | * | ||
2058 | * This function is called by the block layer to issue an io | ||
2059 | * to the device. Upon completion, the callback function will | ||
2060 | * be called with the data parameter passed as the callback data. | ||
2061 | * | ||
2062 | * @dd Pointer to the driver data structure. | ||
2063 | * @start First sector to read. | ||
2064 | * @nsect Number of sectors to read. | ||
2065 | * @nents Number of entries in scatter list for the read command. | ||
2066 | * @tag The tag of this read command. | ||
2067 | * @callback Pointer to the function that should be called | ||
2068 | * when the read completes. | ||
2069 | * @data Callback data passed to the callback function | ||
2070 | * when the read completes. | ||
2071 | * @barrier If non-zero, this command must be completed before | ||
2072 | * issuing any other commands. | ||
2073 | * @dir Direction (read or write) | ||
2074 | * | ||
2075 | * return value | ||
2076 | * None | ||
2077 | */ | ||
2078 | static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, | ||
2079 | int nsect, int nents, int tag, void *callback, | ||
2080 | void *data, int barrier, int dir) | ||
2081 | { | ||
2082 | struct host_to_dev_fis *fis; | ||
2083 | struct mtip_port *port = dd->port; | ||
2084 | struct mtip_cmd *command = &port->commands[tag]; | ||
2085 | |||
2086 | /* Map the scatter list for DMA access */ | ||
2087 | if (dir == READ) | ||
2088 | nents = dma_map_sg(&dd->pdev->dev, command->sg, | ||
2089 | nents, DMA_FROM_DEVICE); | ||
2090 | else | ||
2091 | nents = dma_map_sg(&dd->pdev->dev, command->sg, | ||
2092 | nents, DMA_TO_DEVICE); | ||
2093 | |||
2094 | command->scatter_ents = nents; | ||
2095 | |||
2096 | /* | ||
2097 | * The number of retries for this command before it is | ||
2098 | * reported as a failure to the upper layers. | ||
2099 | */ | ||
2100 | command->retries = MTIP_MAX_RETRIES; | ||
2101 | |||
2102 | /* Fill out fis */ | ||
2103 | fis = command->command; | ||
2104 | fis->type = 0x27; | ||
2105 | fis->opts = 1 << 7; | ||
2106 | fis->command = | ||
2107 | (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE); | ||
2108 | *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF); | ||
2109 | *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF); | ||
2110 | fis->device = 1 << 6; | ||
2111 | if (barrier) | ||
2112 | fis->device |= FUA_BIT; | ||
2113 | fis->features = nsect & 0xFF; | ||
2114 | fis->features_ex = (nsect >> 8) & 0xFF; | ||
2115 | fis->sect_count = ((tag << 3) | (tag >> 5)); | ||
2116 | fis->sect_cnt_ex = 0; | ||
2117 | fis->control = 0; | ||
2118 | fis->res2 = 0; | ||
2119 | fis->res3 = 0; | ||
2120 | fill_command_sg(dd, command, nents); | ||
2121 | |||
2122 | /* Populate the command header */ | ||
2123 | command->command_header->opts = | ||
2124 | __force_bit2int cpu_to_le32( | ||
2125 | (nents << 16) | 5 | AHCI_CMD_PREFETCH); | ||
2126 | command->command_header->byte_count = 0; | ||
2127 | |||
2128 | /* | ||
2129 | * Set the completion function and data for the command | ||
2130 | * within this layer. | ||
2131 | */ | ||
2132 | command->comp_data = dd; | ||
2133 | command->comp_func = mtip_async_complete; | ||
2134 | command->direction = (dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE); | ||
2135 | |||
2136 | /* | ||
2137 | * Set the completion function and data for the command passed | ||
2138 | * from the upper layer. | ||
2139 | */ | ||
2140 | command->async_data = data; | ||
2141 | command->async_callback = callback; | ||
2142 | |||
2143 | /* | ||
2144 | * To prevent this command from being issued | ||
2145 | * if an internal command is in progress or error handling is active. | ||
2146 | */ | ||
2147 | if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) || | ||
2148 | test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) { | ||
2149 | set_bit(tag, port->cmds_to_issue); | ||
2150 | set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags); | ||
2151 | return; | ||
2152 | } | ||
2153 | |||
2154 | /* Issue the command to the hardware */ | ||
2155 | mtip_issue_ncq_command(port, tag); | ||
2156 | |||
2157 | /* Set the command's timeout value.*/ | ||
2158 | port->commands[tag].comp_time = jiffies + msecs_to_jiffies( | ||
2159 | MTIP_NCQ_COMMAND_TIMEOUT_MS); | ||
2160 | } | ||
2161 | |||
2162 | /* | ||
2163 | * Release a command slot. | ||
2164 | * | ||
2165 | * @dd Pointer to the driver data structure. | ||
2166 | * @tag Slot tag | ||
2167 | * | ||
2168 | * return value | ||
2169 | * None | ||
2170 | */ | ||
2171 | static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag) | ||
2172 | { | ||
2173 | release_slot(dd->port, tag); | ||
2174 | } | ||
2175 | |||
2176 | /* | ||
2177 | * Obtain a command slot and return its associated scatter list. | ||
2178 | * | ||
2179 | * @dd Pointer to the driver data structure. | ||
2180 | * @tag Pointer to an int that will receive the allocated command | ||
2181 | * slot tag. | ||
2182 | * | ||
2183 | * return value | ||
2184 | * Pointer to the scatter list for the allocated command slot | ||
2185 | * or NULL if no command slots are available. | ||
2186 | */ | ||
2187 | static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, | ||
2188 | int *tag) | ||
2189 | { | ||
2190 | /* | ||
2191 | * It is possible that, even with this semaphore, a thread | ||
2192 | * may think that no command slots are available. Therefore, we | ||
2193 | * need to make an attempt to get_slot(). | ||
2194 | */ | ||
2195 | down(&dd->port->cmd_slot); | ||
2196 | *tag = get_slot(dd->port); | ||
2197 | |||
2198 | if (unlikely(*tag < 0)) | ||
2199 | return NULL; | ||
2200 | |||
2201 | return dd->port->commands[*tag].sg; | ||
2202 | } | ||
2203 | |||
2204 | /* | ||
2205 | * Sysfs register/status dump. | ||
2206 | * | ||
2207 | * @dev Pointer to the device structure, passed by the kernrel. | ||
2208 | * @attr Pointer to the device_attribute structure passed by the kernel. | ||
2209 | * @buf Pointer to the char buffer that will receive the stats info. | ||
2210 | * | ||
2211 | * return value | ||
2212 | * The size, in bytes, of the data copied into buf. | ||
2213 | */ | ||
2214 | static ssize_t hw_show_registers(struct device *dev, | ||
2215 | struct device_attribute *attr, | ||
2216 | char *buf) | ||
2217 | { | ||
2218 | u32 group_allocated; | ||
2219 | struct driver_data *dd = dev_to_disk(dev)->private_data; | ||
2220 | int size = 0; | ||
2221 | int n; | ||
2222 | |||
2223 | size += sprintf(&buf[size], "%s:\ns_active:\n", __func__); | ||
2224 | |||
2225 | for (n = 0; n < dd->slot_groups; n++) | ||
2226 | size += sprintf(&buf[size], "0x%08x\n", | ||
2227 | readl(dd->port->s_active[n])); | ||
2228 | |||
2229 | size += sprintf(&buf[size], "Command Issue:\n"); | ||
2230 | |||
2231 | for (n = 0; n < dd->slot_groups; n++) | ||
2232 | size += sprintf(&buf[size], "0x%08x\n", | ||
2233 | readl(dd->port->cmd_issue[n])); | ||
2234 | |||
2235 | size += sprintf(&buf[size], "Allocated:\n"); | ||
2236 | |||
2237 | for (n = 0; n < dd->slot_groups; n++) { | ||
2238 | if (sizeof(long) > sizeof(u32)) | ||
2239 | group_allocated = | ||
2240 | dd->port->allocated[n/2] >> (32*(n&1)); | ||
2241 | else | ||
2242 | group_allocated = dd->port->allocated[n]; | ||
2243 | size += sprintf(&buf[size], "0x%08x\n", | ||
2244 | group_allocated); | ||
2245 | } | ||
2246 | |||
2247 | size += sprintf(&buf[size], "completed:\n"); | ||
2248 | |||
2249 | for (n = 0; n < dd->slot_groups; n++) | ||
2250 | size += sprintf(&buf[size], "0x%08x\n", | ||
2251 | readl(dd->port->completed[n])); | ||
2252 | |||
2253 | size += sprintf(&buf[size], "PORT_IRQ_STAT 0x%08x\n", | ||
2254 | readl(dd->port->mmio + PORT_IRQ_STAT)); | ||
2255 | size += sprintf(&buf[size], "HOST_IRQ_STAT 0x%08x\n", | ||
2256 | readl(dd->mmio + HOST_IRQ_STAT)); | ||
2257 | |||
2258 | return size; | ||
2259 | } | ||
2260 | static DEVICE_ATTR(registers, S_IRUGO, hw_show_registers, NULL); | ||
2261 | |||
2262 | /* | ||
2263 | * Create the sysfs related attributes. | ||
2264 | * | ||
2265 | * @dd Pointer to the driver data structure. | ||
2266 | * @kobj Pointer to the kobj for the block device. | ||
2267 | * | ||
2268 | * return value | ||
2269 | * 0 Operation completed successfully. | ||
2270 | * -EINVAL Invalid parameter. | ||
2271 | */ | ||
2272 | static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj) | ||
2273 | { | ||
2274 | if (!kobj || !dd) | ||
2275 | return -EINVAL; | ||
2276 | |||
2277 | if (sysfs_create_file(kobj, &dev_attr_registers.attr)) | ||
2278 | dev_warn(&dd->pdev->dev, | ||
2279 | "Error creating registers sysfs entry\n"); | ||
2280 | return 0; | ||
2281 | } | ||
2282 | |||
2283 | /* | ||
2284 | * Remove the sysfs related attributes. | ||
2285 | * | ||
2286 | * @dd Pointer to the driver data structure. | ||
2287 | * @kobj Pointer to the kobj for the block device. | ||
2288 | * | ||
2289 | * return value | ||
2290 | * 0 Operation completed successfully. | ||
2291 | * -EINVAL Invalid parameter. | ||
2292 | */ | ||
2293 | static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj) | ||
2294 | { | ||
2295 | if (!kobj || !dd) | ||
2296 | return -EINVAL; | ||
2297 | |||
2298 | sysfs_remove_file(kobj, &dev_attr_registers.attr); | ||
2299 | |||
2300 | return 0; | ||
2301 | } | ||
2302 | |||
2303 | /* | ||
2304 | * Perform any init/resume time hardware setup | ||
2305 | * | ||
2306 | * @dd Pointer to the driver data structure. | ||
2307 | * | ||
2308 | * return value | ||
2309 | * None | ||
2310 | */ | ||
2311 | static inline void hba_setup(struct driver_data *dd) | ||
2312 | { | ||
2313 | u32 hwdata; | ||
2314 | hwdata = readl(dd->mmio + HOST_HSORG); | ||
2315 | |||
2316 | /* interrupt bug workaround: use only 1 IS bit.*/ | ||
2317 | writel(hwdata | | ||
2318 | HSORG_DISABLE_SLOTGRP_INTR | | ||
2319 | HSORG_DISABLE_SLOTGRP_PXIS, | ||
2320 | dd->mmio + HOST_HSORG); | ||
2321 | } | ||
2322 | |||
2323 | /* | ||
2324 | * Detect the details of the product, and store anything needed | ||
2325 | * into the driver data structure. This includes product type and | ||
2326 | * version and number of slot groups. | ||
2327 | * | ||
2328 | * @dd Pointer to the driver data structure. | ||
2329 | * | ||
2330 | * return value | ||
2331 | * None | ||
2332 | */ | ||
2333 | static void mtip_detect_product(struct driver_data *dd) | ||
2334 | { | ||
2335 | u32 hwdata; | ||
2336 | unsigned int rev, slotgroups; | ||
2337 | |||
2338 | /* | ||
2339 | * HBA base + 0xFC [15:0] - vendor-specific hardware interface | ||
2340 | * info register: | ||
2341 | * [15:8] hardware/software interface rev# | ||
2342 | * [ 3] asic-style interface | ||
2343 | * [ 2:0] number of slot groups, minus 1 (only valid for asic-style). | ||
2344 | */ | ||
2345 | hwdata = readl(dd->mmio + HOST_HSORG); | ||
2346 | |||
2347 | dd->product_type = MTIP_PRODUCT_UNKNOWN; | ||
2348 | dd->slot_groups = 1; | ||
2349 | |||
2350 | if (hwdata & 0x8) { | ||
2351 | dd->product_type = MTIP_PRODUCT_ASICFPGA; | ||
2352 | rev = (hwdata & HSORG_HWREV) >> 8; | ||
2353 | slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1; | ||
2354 | dev_info(&dd->pdev->dev, | ||
2355 | "ASIC-FPGA design, HS rev 0x%x, " | ||
2356 | "%i slot groups [%i slots]\n", | ||
2357 | rev, | ||
2358 | slotgroups, | ||
2359 | slotgroups * 32); | ||
2360 | |||
2361 | if (slotgroups > MTIP_MAX_SLOT_GROUPS) { | ||
2362 | dev_warn(&dd->pdev->dev, | ||
2363 | "Warning: driver only supports " | ||
2364 | "%i slot groups.\n", MTIP_MAX_SLOT_GROUPS); | ||
2365 | slotgroups = MTIP_MAX_SLOT_GROUPS; | ||
2366 | } | ||
2367 | dd->slot_groups = slotgroups; | ||
2368 | return; | ||
2369 | } | ||
2370 | |||
2371 | dev_warn(&dd->pdev->dev, "Unrecognized product id\n"); | ||
2372 | } | ||
2373 | |||
2374 | /* | ||
2375 | * Blocking wait for FTL rebuild to complete | ||
2376 | * | ||
2377 | * @dd Pointer to the DRIVER_DATA structure. | ||
2378 | * | ||
2379 | * return value | ||
2380 | * 0 FTL rebuild completed successfully | ||
2381 | * -EFAULT FTL rebuild error/timeout/interruption | ||
2382 | */ | ||
2383 | static int mtip_ftl_rebuild_poll(struct driver_data *dd) | ||
2384 | { | ||
2385 | unsigned long timeout, cnt = 0, start; | ||
2386 | |||
2387 | dev_warn(&dd->pdev->dev, | ||
2388 | "FTL rebuild in progress. Polling for completion.\n"); | ||
2389 | |||
2390 | start = jiffies; | ||
2391 | dd->ftlrebuildflag = 1; | ||
2392 | timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS); | ||
2393 | |||
2394 | do { | ||
2395 | if (mtip_check_surprise_removal(dd->pdev)) | ||
2396 | return -EFAULT; | ||
2397 | |||
2398 | if (mtip_get_identify(dd->port, NULL) < 0) | ||
2399 | return -EFAULT; | ||
2400 | |||
2401 | if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == | ||
2402 | MTIP_FTL_REBUILD_MAGIC) { | ||
2403 | ssleep(1); | ||
2404 | /* Print message every 3 minutes */ | ||
2405 | if (cnt++ >= 180) { | ||
2406 | dev_warn(&dd->pdev->dev, | ||
2407 | "FTL rebuild in progress (%d secs).\n", | ||
2408 | jiffies_to_msecs(jiffies - start) / 1000); | ||
2409 | cnt = 0; | ||
2410 | } | ||
2411 | } else { | ||
2412 | dev_warn(&dd->pdev->dev, | ||
2413 | "FTL rebuild complete (%d secs).\n", | ||
2414 | jiffies_to_msecs(jiffies - start) / 1000); | ||
2415 | dd->ftlrebuildflag = 0; | ||
2416 | mtip_block_initialize(dd); | ||
2417 | break; | ||
2418 | } | ||
2419 | ssleep(10); | ||
2420 | } while (time_before(jiffies, timeout)); | ||
2421 | |||
2422 | /* Check for timeout */ | ||
2423 | if (dd->ftlrebuildflag) { | ||
2424 | dev_err(&dd->pdev->dev, | ||
2425 | "Timed out waiting for FTL rebuild to complete (%d secs).\n", | ||
2426 | jiffies_to_msecs(jiffies - start) / 1000); | ||
2427 | return -EFAULT; | ||
2428 | } | ||
2429 | |||
2430 | return 0; | ||
2431 | } | ||
2432 | |||
2433 | /* | ||
2434 | * service thread to issue queued commands | ||
2435 | * | ||
2436 | * @data Pointer to the driver data structure. | ||
2437 | * | ||
2438 | * return value | ||
2439 | * 0 | ||
2440 | */ | ||
2441 | |||
2442 | static int mtip_service_thread(void *data) | ||
2443 | { | ||
2444 | struct driver_data *dd = (struct driver_data *)data; | ||
2445 | unsigned long slot, slot_start, slot_wrap; | ||
2446 | unsigned int num_cmd_slots = dd->slot_groups * 32; | ||
2447 | struct mtip_port *port = dd->port; | ||
2448 | |||
2449 | while (1) { | ||
2450 | /* | ||
2451 | * the condition is to check neither an internal command is | ||
2452 | * is in progress nor error handling is active | ||
2453 | */ | ||
2454 | wait_event_interruptible(port->svc_wait, (port->flags) && | ||
2455 | !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) && | ||
2456 | !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags)); | ||
2457 | |||
2458 | if (kthread_should_stop()) | ||
2459 | break; | ||
2460 | |||
2461 | set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags); | ||
2462 | if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) { | ||
2463 | slot = 1; | ||
2464 | /* used to restrict the loop to one iteration */ | ||
2465 | slot_start = num_cmd_slots; | ||
2466 | slot_wrap = 0; | ||
2467 | while (1) { | ||
2468 | slot = find_next_bit(port->cmds_to_issue, | ||
2469 | num_cmd_slots, slot); | ||
2470 | if (slot_wrap == 1) { | ||
2471 | if ((slot_start >= slot) || | ||
2472 | (slot >= num_cmd_slots)) | ||
2473 | break; | ||
2474 | } | ||
2475 | if (unlikely(slot_start == num_cmd_slots)) | ||
2476 | slot_start = slot; | ||
2477 | |||
2478 | if (unlikely(slot == num_cmd_slots)) { | ||
2479 | slot = 1; | ||
2480 | slot_wrap = 1; | ||
2481 | continue; | ||
2482 | } | ||
2483 | |||
2484 | /* Issue the command to the hardware */ | ||
2485 | mtip_issue_ncq_command(port, slot); | ||
2486 | |||
2487 | /* Set the command's timeout value.*/ | ||
2488 | port->commands[slot].comp_time = jiffies + | ||
2489 | msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS); | ||
2490 | |||
2491 | clear_bit(slot, port->cmds_to_issue); | ||
2492 | } | ||
2493 | |||
2494 | clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags); | ||
2495 | } else if (test_bit(MTIP_FLAG_REBUILD_BIT, &port->flags)) { | ||
2496 | mtip_ftl_rebuild_poll(dd); | ||
2497 | clear_bit(MTIP_FLAG_REBUILD_BIT, &port->flags); | ||
2498 | } | ||
2499 | clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags); | ||
2500 | |||
2501 | if (test_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &port->flags)) | ||
2502 | break; | ||
2503 | } | ||
2504 | return 0; | ||
2505 | } | ||
2506 | |||
2507 | /* | ||
2508 | * Called once for each card. | ||
2509 | * | ||
2510 | * @dd Pointer to the driver data structure. | ||
2511 | * | ||
2512 | * return value | ||
2513 | * 0 on success, else an error code. | ||
2514 | */ | ||
2515 | static int mtip_hw_init(struct driver_data *dd) | ||
2516 | { | ||
2517 | int i; | ||
2518 | int rv; | ||
2519 | unsigned int num_command_slots; | ||
2520 | |||
2521 | dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR]; | ||
2522 | |||
2523 | mtip_detect_product(dd); | ||
2524 | if (dd->product_type == MTIP_PRODUCT_UNKNOWN) { | ||
2525 | rv = -EIO; | ||
2526 | goto out1; | ||
2527 | } | ||
2528 | num_command_slots = dd->slot_groups * 32; | ||
2529 | |||
2530 | hba_setup(dd); | ||
2531 | |||
2532 | tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd); | ||
2533 | |||
2534 | dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL); | ||
2535 | if (!dd->port) { | ||
2536 | dev_err(&dd->pdev->dev, | ||
2537 | "Memory allocation: port structure\n"); | ||
2538 | return -ENOMEM; | ||
2539 | } | ||
2540 | |||
2541 | /* Counting semaphore to track command slot usage */ | ||
2542 | sema_init(&dd->port->cmd_slot, num_command_slots - 1); | ||
2543 | |||
2544 | /* Spinlock to prevent concurrent issue */ | ||
2545 | spin_lock_init(&dd->port->cmd_issue_lock); | ||
2546 | |||
2547 | /* Set the port mmio base address. */ | ||
2548 | dd->port->mmio = dd->mmio + PORT_OFFSET; | ||
2549 | dd->port->dd = dd; | ||
2550 | |||
2551 | /* Allocate memory for the command list. */ | ||
2552 | dd->port->command_list = | ||
2553 | dmam_alloc_coherent(&dd->pdev->dev, | ||
2554 | HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), | ||
2555 | &dd->port->command_list_dma, | ||
2556 | GFP_KERNEL); | ||
2557 | if (!dd->port->command_list) { | ||
2558 | dev_err(&dd->pdev->dev, | ||
2559 | "Memory allocation: command list\n"); | ||
2560 | rv = -ENOMEM; | ||
2561 | goto out1; | ||
2562 | } | ||
2563 | |||
2564 | /* Clear the memory we have allocated. */ | ||
2565 | memset(dd->port->command_list, | ||
2566 | 0, | ||
2567 | HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2)); | ||
2568 | |||
2569 | /* Setup the addresse of the RX FIS. */ | ||
2570 | dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ; | ||
2571 | dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ; | ||
2572 | |||
2573 | /* Setup the address of the command tables. */ | ||
2574 | dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ; | ||
2575 | dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ; | ||
2576 | |||
2577 | /* Setup the address of the identify data. */ | ||
2578 | dd->port->identify = dd->port->command_table + | ||
2579 | HW_CMD_TBL_AR_SZ; | ||
2580 | dd->port->identify_dma = dd->port->command_tbl_dma + | ||
2581 | HW_CMD_TBL_AR_SZ; | ||
2582 | |||
2583 | /* Setup the address of the sector buffer. */ | ||
2584 | dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE; | ||
2585 | dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE; | ||
2586 | |||
2587 | /* Point the command headers at the command tables. */ | ||
2588 | for (i = 0; i < num_command_slots; i++) { | ||
2589 | dd->port->commands[i].command_header = | ||
2590 | dd->port->command_list + | ||
2591 | (sizeof(struct mtip_cmd_hdr) * i); | ||
2592 | dd->port->commands[i].command_header_dma = | ||
2593 | dd->port->command_list_dma + | ||
2594 | (sizeof(struct mtip_cmd_hdr) * i); | ||
2595 | |||
2596 | dd->port->commands[i].command = | ||
2597 | dd->port->command_table + (HW_CMD_TBL_SZ * i); | ||
2598 | dd->port->commands[i].command_dma = | ||
2599 | dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i); | ||
2600 | |||
2601 | if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64) | ||
2602 | dd->port->commands[i].command_header->ctbau = | ||
2603 | __force_bit2int cpu_to_le32( | ||
2604 | (dd->port->commands[i].command_dma >> 16) >> 16); | ||
2605 | dd->port->commands[i].command_header->ctba = | ||
2606 | __force_bit2int cpu_to_le32( | ||
2607 | dd->port->commands[i].command_dma & 0xFFFFFFFF); | ||
2608 | |||
2609 | /* | ||
2610 | * If this is not done, a bug is reported by the stock | ||
2611 | * FC11 i386. Due to the fact that it has lots of kernel | ||
2612 | * debugging enabled. | ||
2613 | */ | ||
2614 | sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG); | ||
2615 | |||
2616 | /* Mark all commands as currently inactive.*/ | ||
2617 | atomic_set(&dd->port->commands[i].active, 0); | ||
2618 | } | ||
2619 | |||
2620 | /* Setup the pointers to the extended s_active and CI registers. */ | ||
2621 | for (i = 0; i < dd->slot_groups; i++) { | ||
2622 | dd->port->s_active[i] = | ||
2623 | dd->port->mmio + i*0x80 + PORT_SCR_ACT; | ||
2624 | dd->port->cmd_issue[i] = | ||
2625 | dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE; | ||
2626 | dd->port->completed[i] = | ||
2627 | dd->port->mmio + i*0x80 + PORT_SDBV; | ||
2628 | } | ||
2629 | |||
2630 | /* Reset the HBA. */ | ||
2631 | if (mtip_hba_reset(dd) < 0) { | ||
2632 | dev_err(&dd->pdev->dev, | ||
2633 | "Card did not reset within timeout\n"); | ||
2634 | rv = -EIO; | ||
2635 | goto out2; | ||
2636 | } | ||
2637 | |||
2638 | mtip_init_port(dd->port); | ||
2639 | mtip_start_port(dd->port); | ||
2640 | |||
2641 | /* Setup the ISR and enable interrupts. */ | ||
2642 | rv = devm_request_irq(&dd->pdev->dev, | ||
2643 | dd->pdev->irq, | ||
2644 | mtip_irq_handler, | ||
2645 | IRQF_SHARED, | ||
2646 | dev_driver_string(&dd->pdev->dev), | ||
2647 | dd); | ||
2648 | |||
2649 | if (rv) { | ||
2650 | dev_err(&dd->pdev->dev, | ||
2651 | "Unable to allocate IRQ %d\n", dd->pdev->irq); | ||
2652 | goto out2; | ||
2653 | } | ||
2654 | |||
2655 | /* Enable interrupts on the HBA. */ | ||
2656 | writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, | ||
2657 | dd->mmio + HOST_CTL); | ||
2658 | |||
2659 | init_timer(&dd->port->cmd_timer); | ||
2660 | init_waitqueue_head(&dd->port->svc_wait); | ||
2661 | |||
2662 | dd->port->cmd_timer.data = (unsigned long int) dd->port; | ||
2663 | dd->port->cmd_timer.function = mtip_timeout_function; | ||
2664 | mod_timer(&dd->port->cmd_timer, | ||
2665 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
2666 | |||
2667 | if (mtip_get_identify(dd->port, NULL) < 0) { | ||
2668 | rv = -EFAULT; | ||
2669 | goto out3; | ||
2670 | } | ||
2671 | |||
2672 | if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == | ||
2673 | MTIP_FTL_REBUILD_MAGIC) { | ||
2674 | set_bit(MTIP_FLAG_REBUILD_BIT, &dd->port->flags); | ||
2675 | return MTIP_FTL_REBUILD_MAGIC; | ||
2676 | } | ||
2677 | mtip_dump_identify(dd->port); | ||
2678 | return rv; | ||
2679 | |||
2680 | out3: | ||
2681 | del_timer_sync(&dd->port->cmd_timer); | ||
2682 | |||
2683 | /* Disable interrupts on the HBA. */ | ||
2684 | writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, | ||
2685 | dd->mmio + HOST_CTL); | ||
2686 | |||
2687 | /*Release the IRQ. */ | ||
2688 | devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); | ||
2689 | |||
2690 | out2: | ||
2691 | mtip_deinit_port(dd->port); | ||
2692 | |||
2693 | /* Free the command/command header memory. */ | ||
2694 | dmam_free_coherent(&dd->pdev->dev, | ||
2695 | HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), | ||
2696 | dd->port->command_list, | ||
2697 | dd->port->command_list_dma); | ||
2698 | out1: | ||
2699 | /* Free the memory allocated for the for structure. */ | ||
2700 | kfree(dd->port); | ||
2701 | |||
2702 | return rv; | ||
2703 | } | ||
2704 | |||
2705 | /* | ||
2706 | * Called to deinitialize an interface. | ||
2707 | * | ||
2708 | * @dd Pointer to the driver data structure. | ||
2709 | * | ||
2710 | * return value | ||
2711 | * 0 | ||
2712 | */ | ||
2713 | static int mtip_hw_exit(struct driver_data *dd) | ||
2714 | { | ||
2715 | /* | ||
2716 | * Send standby immediate (E0h) to the drive so that it | ||
2717 | * saves its state. | ||
2718 | */ | ||
2719 | if (atomic_read(&dd->drv_cleanup_done) != true) { | ||
2720 | |||
2721 | mtip_standby_immediate(dd->port); | ||
2722 | |||
2723 | /* de-initialize the port. */ | ||
2724 | mtip_deinit_port(dd->port); | ||
2725 | |||
2726 | /* Disable interrupts on the HBA. */ | ||
2727 | writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, | ||
2728 | dd->mmio + HOST_CTL); | ||
2729 | } | ||
2730 | |||
2731 | del_timer_sync(&dd->port->cmd_timer); | ||
2732 | |||
2733 | /* Release the IRQ. */ | ||
2734 | devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); | ||
2735 | |||
2736 | /* Stop the bottom half tasklet. */ | ||
2737 | tasklet_kill(&dd->tasklet); | ||
2738 | |||
2739 | /* Free the command/command header memory. */ | ||
2740 | dmam_free_coherent(&dd->pdev->dev, | ||
2741 | HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), | ||
2742 | dd->port->command_list, | ||
2743 | dd->port->command_list_dma); | ||
2744 | /* Free the memory allocated for the for structure. */ | ||
2745 | kfree(dd->port); | ||
2746 | |||
2747 | return 0; | ||
2748 | } | ||
2749 | |||
2750 | /* | ||
2751 | * Issue a Standby Immediate command to the device. | ||
2752 | * | ||
2753 | * This function is called by the Block Layer just before the | ||
2754 | * system powers off during a shutdown. | ||
2755 | * | ||
2756 | * @dd Pointer to the driver data structure. | ||
2757 | * | ||
2758 | * return value | ||
2759 | * 0 | ||
2760 | */ | ||
2761 | static int mtip_hw_shutdown(struct driver_data *dd) | ||
2762 | { | ||
2763 | /* | ||
2764 | * Send standby immediate (E0h) to the drive so that it | ||
2765 | * saves its state. | ||
2766 | */ | ||
2767 | mtip_standby_immediate(dd->port); | ||
2768 | |||
2769 | return 0; | ||
2770 | } | ||
2771 | |||
2772 | /* | ||
2773 | * Suspend function | ||
2774 | * | ||
2775 | * This function is called by the Block Layer just before the | ||
2776 | * system hibernates. | ||
2777 | * | ||
2778 | * @dd Pointer to the driver data structure. | ||
2779 | * | ||
2780 | * return value | ||
2781 | * 0 Suspend was successful | ||
2782 | * -EFAULT Suspend was not successful | ||
2783 | */ | ||
2784 | static int mtip_hw_suspend(struct driver_data *dd) | ||
2785 | { | ||
2786 | /* | ||
2787 | * Send standby immediate (E0h) to the drive | ||
2788 | * so that it saves its state. | ||
2789 | */ | ||
2790 | if (mtip_standby_immediate(dd->port) != 0) { | ||
2791 | dev_err(&dd->pdev->dev, | ||
2792 | "Failed standby-immediate command\n"); | ||
2793 | return -EFAULT; | ||
2794 | } | ||
2795 | |||
2796 | /* Disable interrupts on the HBA.*/ | ||
2797 | writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, | ||
2798 | dd->mmio + HOST_CTL); | ||
2799 | mtip_deinit_port(dd->port); | ||
2800 | |||
2801 | return 0; | ||
2802 | } | ||
2803 | |||
2804 | /* | ||
2805 | * Resume function | ||
2806 | * | ||
2807 | * This function is called by the Block Layer as the | ||
2808 | * system resumes. | ||
2809 | * | ||
2810 | * @dd Pointer to the driver data structure. | ||
2811 | * | ||
2812 | * return value | ||
2813 | * 0 Resume was successful | ||
2814 | * -EFAULT Resume was not successful | ||
2815 | */ | ||
2816 | static int mtip_hw_resume(struct driver_data *dd) | ||
2817 | { | ||
2818 | /* Perform any needed hardware setup steps */ | ||
2819 | hba_setup(dd); | ||
2820 | |||
2821 | /* Reset the HBA */ | ||
2822 | if (mtip_hba_reset(dd) != 0) { | ||
2823 | dev_err(&dd->pdev->dev, | ||
2824 | "Unable to reset the HBA\n"); | ||
2825 | return -EFAULT; | ||
2826 | } | ||
2827 | |||
2828 | /* | ||
2829 | * Enable the port, DMA engine, and FIS reception specific | ||
2830 | * h/w in controller. | ||
2831 | */ | ||
2832 | mtip_init_port(dd->port); | ||
2833 | mtip_start_port(dd->port); | ||
2834 | |||
2835 | /* Enable interrupts on the HBA.*/ | ||
2836 | writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, | ||
2837 | dd->mmio + HOST_CTL); | ||
2838 | |||
2839 | return 0; | ||
2840 | } | ||
2841 | |||
2842 | /* | ||
2843 | * Helper function for reusing disk name | ||
2844 | * upon hot insertion. | ||
2845 | */ | ||
2846 | static int rssd_disk_name_format(char *prefix, | ||
2847 | int index, | ||
2848 | char *buf, | ||
2849 | int buflen) | ||
2850 | { | ||
2851 | const int base = 'z' - 'a' + 1; | ||
2852 | char *begin = buf + strlen(prefix); | ||
2853 | char *end = buf + buflen; | ||
2854 | char *p; | ||
2855 | int unit; | ||
2856 | |||
2857 | p = end - 1; | ||
2858 | *p = '\0'; | ||
2859 | unit = base; | ||
2860 | do { | ||
2861 | if (p == begin) | ||
2862 | return -EINVAL; | ||
2863 | *--p = 'a' + (index % unit); | ||
2864 | index = (index / unit) - 1; | ||
2865 | } while (index >= 0); | ||
2866 | |||
2867 | memmove(begin, p, end - p); | ||
2868 | memcpy(buf, prefix, strlen(prefix)); | ||
2869 | |||
2870 | return 0; | ||
2871 | } | ||
2872 | |||
2873 | /* | ||
2874 | * Block layer IOCTL handler. | ||
2875 | * | ||
2876 | * @dev Pointer to the block_device structure. | ||
2877 | * @mode ignored | ||
2878 | * @cmd IOCTL command passed from the user application. | ||
2879 | * @arg Argument passed from the user application. | ||
2880 | * | ||
2881 | * return value | ||
2882 | * 0 IOCTL completed successfully. | ||
2883 | * -ENOTTY IOCTL not supported or invalid driver data | ||
2884 | * structure pointer. | ||
2885 | */ | ||
2886 | static int mtip_block_ioctl(struct block_device *dev, | ||
2887 | fmode_t mode, | ||
2888 | unsigned cmd, | ||
2889 | unsigned long arg) | ||
2890 | { | ||
2891 | struct driver_data *dd = dev->bd_disk->private_data; | ||
2892 | |||
2893 | if (!capable(CAP_SYS_ADMIN)) | ||
2894 | return -EACCES; | ||
2895 | |||
2896 | if (!dd) | ||
2897 | return -ENOTTY; | ||
2898 | |||
2899 | switch (cmd) { | ||
2900 | case BLKFLSBUF: | ||
2901 | return -ENOTTY; | ||
2902 | default: | ||
2903 | return mtip_hw_ioctl(dd, cmd, arg); | ||
2904 | } | ||
2905 | } | ||
2906 | |||
2907 | #ifdef CONFIG_COMPAT | ||
2908 | /* | ||
2909 | * Block layer compat IOCTL handler. | ||
2910 | * | ||
2911 | * @dev Pointer to the block_device structure. | ||
2912 | * @mode ignored | ||
2913 | * @cmd IOCTL command passed from the user application. | ||
2914 | * @arg Argument passed from the user application. | ||
2915 | * | ||
2916 | * return value | ||
2917 | * 0 IOCTL completed successfully. | ||
2918 | * -ENOTTY IOCTL not supported or invalid driver data | ||
2919 | * structure pointer. | ||
2920 | */ | ||
2921 | static int mtip_block_compat_ioctl(struct block_device *dev, | ||
2922 | fmode_t mode, | ||
2923 | unsigned cmd, | ||
2924 | unsigned long arg) | ||
2925 | { | ||
2926 | struct driver_data *dd = dev->bd_disk->private_data; | ||
2927 | |||
2928 | if (!capable(CAP_SYS_ADMIN)) | ||
2929 | return -EACCES; | ||
2930 | |||
2931 | if (!dd) | ||
2932 | return -ENOTTY; | ||
2933 | |||
2934 | switch (cmd) { | ||
2935 | case BLKFLSBUF: | ||
2936 | return -ENOTTY; | ||
2937 | case HDIO_DRIVE_TASKFILE: { | ||
2938 | struct mtip_compat_ide_task_request_s __user *compat_req_task; | ||
2939 | ide_task_request_t req_task; | ||
2940 | int compat_tasksize, outtotal, ret; | ||
2941 | |||
2942 | compat_tasksize = | ||
2943 | sizeof(struct mtip_compat_ide_task_request_s); | ||
2944 | |||
2945 | compat_req_task = | ||
2946 | (struct mtip_compat_ide_task_request_s __user *) arg; | ||
2947 | |||
2948 | if (copy_from_user(&req_task, (void __user *) arg, | ||
2949 | compat_tasksize - (2 * sizeof(compat_long_t)))) | ||
2950 | return -EFAULT; | ||
2951 | |||
2952 | if (get_user(req_task.out_size, &compat_req_task->out_size)) | ||
2953 | return -EFAULT; | ||
2954 | |||
2955 | if (get_user(req_task.in_size, &compat_req_task->in_size)) | ||
2956 | return -EFAULT; | ||
2957 | |||
2958 | outtotal = sizeof(struct mtip_compat_ide_task_request_s); | ||
2959 | |||
2960 | ret = exec_drive_taskfile(dd, (void __user *) arg, | ||
2961 | &req_task, outtotal); | ||
2962 | |||
2963 | if (copy_to_user((void __user *) arg, &req_task, | ||
2964 | compat_tasksize - | ||
2965 | (2 * sizeof(compat_long_t)))) | ||
2966 | return -EFAULT; | ||
2967 | |||
2968 | if (put_user(req_task.out_size, &compat_req_task->out_size)) | ||
2969 | return -EFAULT; | ||
2970 | |||
2971 | if (put_user(req_task.in_size, &compat_req_task->in_size)) | ||
2972 | return -EFAULT; | ||
2973 | |||
2974 | return ret; | ||
2975 | } | ||
2976 | default: | ||
2977 | return mtip_hw_ioctl(dd, cmd, arg); | ||
2978 | } | ||
2979 | } | ||
2980 | #endif | ||
2981 | |||
2982 | /* | ||
2983 | * Obtain the geometry of the device. | ||
2984 | * | ||
2985 | * You may think that this function is obsolete, but some applications, | ||
2986 | * fdisk for example still used CHS values. This function describes the | ||
2987 | * device as having 224 heads and 56 sectors per cylinder. These values are | ||
2988 | * chosen so that each cylinder is aligned on a 4KB boundary. Since a | ||
2989 | * partition is described in terms of a start and end cylinder this means | ||
2990 | * that each partition is also 4KB aligned. Non-aligned partitions adversely | ||
2991 | * affects performance. | ||
2992 | * | ||
2993 | * @dev Pointer to the block_device strucutre. | ||
2994 | * @geo Pointer to a hd_geometry structure. | ||
2995 | * | ||
2996 | * return value | ||
2997 | * 0 Operation completed successfully. | ||
2998 | * -ENOTTY An error occurred while reading the drive capacity. | ||
2999 | */ | ||
3000 | static int mtip_block_getgeo(struct block_device *dev, | ||
3001 | struct hd_geometry *geo) | ||
3002 | { | ||
3003 | struct driver_data *dd = dev->bd_disk->private_data; | ||
3004 | sector_t capacity; | ||
3005 | |||
3006 | if (!dd) | ||
3007 | return -ENOTTY; | ||
3008 | |||
3009 | if (!(mtip_hw_get_capacity(dd, &capacity))) { | ||
3010 | dev_warn(&dd->pdev->dev, | ||
3011 | "Could not get drive capacity.\n"); | ||
3012 | return -ENOTTY; | ||
3013 | } | ||
3014 | |||
3015 | geo->heads = 224; | ||
3016 | geo->sectors = 56; | ||
3017 | sector_div(capacity, (geo->heads * geo->sectors)); | ||
3018 | geo->cylinders = capacity; | ||
3019 | return 0; | ||
3020 | } | ||
3021 | |||
3022 | /* | ||
3023 | * Block device operation function. | ||
3024 | * | ||
3025 | * This structure contains pointers to the functions required by the block | ||
3026 | * layer. | ||
3027 | */ | ||
3028 | static const struct block_device_operations mtip_block_ops = { | ||
3029 | .ioctl = mtip_block_ioctl, | ||
3030 | #ifdef CONFIG_COMPAT | ||
3031 | .compat_ioctl = mtip_block_compat_ioctl, | ||
3032 | #endif | ||
3033 | .getgeo = mtip_block_getgeo, | ||
3034 | .owner = THIS_MODULE | ||
3035 | }; | ||
3036 | |||
3037 | /* | ||
3038 | * Block layer make request function. | ||
3039 | * | ||
3040 | * This function is called by the kernel to process a BIO for | ||
3041 | * the P320 device. | ||
3042 | * | ||
3043 | * @queue Pointer to the request queue. Unused other than to obtain | ||
3044 | * the driver data structure. | ||
3045 | * @bio Pointer to the BIO. | ||
3046 | * | ||
3047 | */ | ||
3048 | static void mtip_make_request(struct request_queue *queue, struct bio *bio) | ||
3049 | { | ||
3050 | struct driver_data *dd = queue->queuedata; | ||
3051 | struct scatterlist *sg; | ||
3052 | struct bio_vec *bvec; | ||
3053 | int nents = 0; | ||
3054 | int tag = 0; | ||
3055 | |||
3056 | if (unlikely(!bio_has_data(bio))) { | ||
3057 | blk_queue_flush(queue, 0); | ||
3058 | bio_endio(bio, 0); | ||
3059 | return; | ||
3060 | } | ||
3061 | |||
3062 | sg = mtip_hw_get_scatterlist(dd, &tag); | ||
3063 | if (likely(sg != NULL)) { | ||
3064 | blk_queue_bounce(queue, &bio); | ||
3065 | |||
3066 | if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) { | ||
3067 | dev_warn(&dd->pdev->dev, | ||
3068 | "Maximum number of SGL entries exceeded"); | ||
3069 | bio_io_error(bio); | ||
3070 | mtip_hw_release_scatterlist(dd, tag); | ||
3071 | return; | ||
3072 | } | ||
3073 | |||
3074 | /* Create the scatter list for this bio. */ | ||
3075 | bio_for_each_segment(bvec, bio, nents) { | ||
3076 | sg_set_page(&sg[nents], | ||
3077 | bvec->bv_page, | ||
3078 | bvec->bv_len, | ||
3079 | bvec->bv_offset); | ||
3080 | } | ||
3081 | |||
3082 | /* Issue the read/write. */ | ||
3083 | mtip_hw_submit_io(dd, | ||
3084 | bio->bi_sector, | ||
3085 | bio_sectors(bio), | ||
3086 | nents, | ||
3087 | tag, | ||
3088 | bio_endio, | ||
3089 | bio, | ||
3090 | bio->bi_rw & REQ_FUA, | ||
3091 | bio_data_dir(bio)); | ||
3092 | } else | ||
3093 | bio_io_error(bio); | ||
3094 | } | ||
3095 | |||
3096 | /* | ||
3097 | * Block layer initialization function. | ||
3098 | * | ||
3099 | * This function is called once by the PCI layer for each P320 | ||
3100 | * device that is connected to the system. | ||
3101 | * | ||
3102 | * @dd Pointer to the driver data structure. | ||
3103 | * | ||
3104 | * return value | ||
3105 | * 0 on success else an error code. | ||
3106 | */ | ||
3107 | static int mtip_block_initialize(struct driver_data *dd) | ||
3108 | { | ||
3109 | int rv = 0, wait_for_rebuild = 0; | ||
3110 | sector_t capacity; | ||
3111 | unsigned int index = 0; | ||
3112 | struct kobject *kobj; | ||
3113 | unsigned char thd_name[16]; | ||
3114 | |||
3115 | if (dd->disk) | ||
3116 | goto skip_create_disk; /* hw init done, before rebuild */ | ||
3117 | |||
3118 | /* Initialize the protocol layer. */ | ||
3119 | wait_for_rebuild = mtip_hw_init(dd); | ||
3120 | if (wait_for_rebuild < 0) { | ||
3121 | dev_err(&dd->pdev->dev, | ||
3122 | "Protocol layer initialization failed\n"); | ||
3123 | rv = -EINVAL; | ||
3124 | goto protocol_init_error; | ||
3125 | } | ||
3126 | |||
3127 | dd->disk = alloc_disk(MTIP_MAX_MINORS); | ||
3128 | if (dd->disk == NULL) { | ||
3129 | dev_err(&dd->pdev->dev, | ||
3130 | "Unable to allocate gendisk structure\n"); | ||
3131 | rv = -EINVAL; | ||
3132 | goto alloc_disk_error; | ||
3133 | } | ||
3134 | |||
3135 | /* Generate the disk name, implemented same as in sd.c */ | ||
3136 | do { | ||
3137 | if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL)) | ||
3138 | goto ida_get_error; | ||
3139 | |||
3140 | spin_lock(&rssd_index_lock); | ||
3141 | rv = ida_get_new(&rssd_index_ida, &index); | ||
3142 | spin_unlock(&rssd_index_lock); | ||
3143 | } while (rv == -EAGAIN); | ||
3144 | |||
3145 | if (rv) | ||
3146 | goto ida_get_error; | ||
3147 | |||
3148 | rv = rssd_disk_name_format("rssd", | ||
3149 | index, | ||
3150 | dd->disk->disk_name, | ||
3151 | DISK_NAME_LEN); | ||
3152 | if (rv) | ||
3153 | goto disk_index_error; | ||
3154 | |||
3155 | dd->disk->driverfs_dev = &dd->pdev->dev; | ||
3156 | dd->disk->major = dd->major; | ||
3157 | dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS; | ||
3158 | dd->disk->fops = &mtip_block_ops; | ||
3159 | dd->disk->private_data = dd; | ||
3160 | dd->index = index; | ||
3161 | |||
3162 | /* | ||
3163 | * if rebuild pending, start the service thread, and delay the block | ||
3164 | * queue creation and add_disk() | ||
3165 | */ | ||
3166 | if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) | ||
3167 | goto start_service_thread; | ||
3168 | |||
3169 | skip_create_disk: | ||
3170 | /* Allocate the request queue. */ | ||
3171 | dd->queue = blk_alloc_queue(GFP_KERNEL); | ||
3172 | if (dd->queue == NULL) { | ||
3173 | dev_err(&dd->pdev->dev, | ||
3174 | "Unable to allocate request queue\n"); | ||
3175 | rv = -ENOMEM; | ||
3176 | goto block_queue_alloc_init_error; | ||
3177 | } | ||
3178 | |||
3179 | /* Attach our request function to the request queue. */ | ||
3180 | blk_queue_make_request(dd->queue, mtip_make_request); | ||
3181 | |||
3182 | dd->disk->queue = dd->queue; | ||
3183 | dd->queue->queuedata = dd; | ||
3184 | |||
3185 | /* Set device limits. */ | ||
3186 | set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); | ||
3187 | blk_queue_max_segments(dd->queue, MTIP_MAX_SG); | ||
3188 | blk_queue_physical_block_size(dd->queue, 4096); | ||
3189 | blk_queue_io_min(dd->queue, 4096); | ||
3190 | blk_queue_flush(dd->queue, 0); | ||
3191 | |||
3192 | /* Set the capacity of the device in 512 byte sectors. */ | ||
3193 | if (!(mtip_hw_get_capacity(dd, &capacity))) { | ||
3194 | dev_warn(&dd->pdev->dev, | ||
3195 | "Could not read drive capacity\n"); | ||
3196 | rv = -EIO; | ||
3197 | goto read_capacity_error; | ||
3198 | } | ||
3199 | set_capacity(dd->disk, capacity); | ||
3200 | |||
3201 | /* Enable the block device and add it to /dev */ | ||
3202 | add_disk(dd->disk); | ||
3203 | |||
3204 | /* | ||
3205 | * Now that the disk is active, initialize any sysfs attributes | ||
3206 | * managed by the protocol layer. | ||
3207 | */ | ||
3208 | kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); | ||
3209 | if (kobj) { | ||
3210 | mtip_hw_sysfs_init(dd, kobj); | ||
3211 | kobject_put(kobj); | ||
3212 | } | ||
3213 | |||
3214 | if (dd->mtip_svc_handler) | ||
3215 | return rv; /* service thread created for handling rebuild */ | ||
3216 | |||
3217 | start_service_thread: | ||
3218 | sprintf(thd_name, "mtip_svc_thd_%02d", index); | ||
3219 | |||
3220 | dd->mtip_svc_handler = kthread_run(mtip_service_thread, | ||
3221 | dd, thd_name); | ||
3222 | |||
3223 | if (IS_ERR(dd->mtip_svc_handler)) { | ||
3224 | printk(KERN_ERR "mtip32xx: service thread failed to start\n"); | ||
3225 | dd->mtip_svc_handler = NULL; | ||
3226 | rv = -EFAULT; | ||
3227 | goto kthread_run_error; | ||
3228 | } | ||
3229 | |||
3230 | return rv; | ||
3231 | |||
3232 | kthread_run_error: | ||
3233 | /* Delete our gendisk. This also removes the device from /dev */ | ||
3234 | del_gendisk(dd->disk); | ||
3235 | |||
3236 | read_capacity_error: | ||
3237 | blk_cleanup_queue(dd->queue); | ||
3238 | |||
3239 | block_queue_alloc_init_error: | ||
3240 | disk_index_error: | ||
3241 | spin_lock(&rssd_index_lock); | ||
3242 | ida_remove(&rssd_index_ida, index); | ||
3243 | spin_unlock(&rssd_index_lock); | ||
3244 | |||
3245 | ida_get_error: | ||
3246 | put_disk(dd->disk); | ||
3247 | |||
3248 | alloc_disk_error: | ||
3249 | mtip_hw_exit(dd); /* De-initialize the protocol layer. */ | ||
3250 | |||
3251 | protocol_init_error: | ||
3252 | return rv; | ||
3253 | } | ||
3254 | |||
3255 | /* | ||
3256 | * Block layer deinitialization function. | ||
3257 | * | ||
3258 | * Called by the PCI layer as each P320 device is removed. | ||
3259 | * | ||
3260 | * @dd Pointer to the driver data structure. | ||
3261 | * | ||
3262 | * return value | ||
3263 | * 0 | ||
3264 | */ | ||
3265 | static int mtip_block_remove(struct driver_data *dd) | ||
3266 | { | ||
3267 | struct kobject *kobj; | ||
3268 | |||
3269 | if (dd->mtip_svc_handler) { | ||
3270 | set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags); | ||
3271 | wake_up_interruptible(&dd->port->svc_wait); | ||
3272 | kthread_stop(dd->mtip_svc_handler); | ||
3273 | } | ||
3274 | |||
3275 | /* Clean up the sysfs attributes managed by the protocol layer. */ | ||
3276 | kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); | ||
3277 | if (kobj) { | ||
3278 | mtip_hw_sysfs_exit(dd, kobj); | ||
3279 | kobject_put(kobj); | ||
3280 | } | ||
3281 | |||
3282 | /* | ||
3283 | * Delete our gendisk structure. This also removes the device | ||
3284 | * from /dev | ||
3285 | */ | ||
3286 | del_gendisk(dd->disk); | ||
3287 | blk_cleanup_queue(dd->queue); | ||
3288 | dd->disk = NULL; | ||
3289 | dd->queue = NULL; | ||
3290 | |||
3291 | /* De-initialize the protocol layer. */ | ||
3292 | mtip_hw_exit(dd); | ||
3293 | |||
3294 | return 0; | ||
3295 | } | ||
3296 | |||
3297 | /* | ||
3298 | * Function called by the PCI layer when just before the | ||
3299 | * machine shuts down. | ||
3300 | * | ||
3301 | * If a protocol layer shutdown function is present it will be called | ||
3302 | * by this function. | ||
3303 | * | ||
3304 | * @dd Pointer to the driver data structure. | ||
3305 | * | ||
3306 | * return value | ||
3307 | * 0 | ||
3308 | */ | ||
3309 | static int mtip_block_shutdown(struct driver_data *dd) | ||
3310 | { | ||
3311 | dev_info(&dd->pdev->dev, | ||
3312 | "Shutting down %s ...\n", dd->disk->disk_name); | ||
3313 | |||
3314 | /* Delete our gendisk structure, and cleanup the blk queue. */ | ||
3315 | del_gendisk(dd->disk); | ||
3316 | blk_cleanup_queue(dd->queue); | ||
3317 | dd->disk = NULL; | ||
3318 | dd->queue = NULL; | ||
3319 | |||
3320 | mtip_hw_shutdown(dd); | ||
3321 | return 0; | ||
3322 | } | ||
3323 | |||
3324 | static int mtip_block_suspend(struct driver_data *dd) | ||
3325 | { | ||
3326 | dev_info(&dd->pdev->dev, | ||
3327 | "Suspending %s ...\n", dd->disk->disk_name); | ||
3328 | mtip_hw_suspend(dd); | ||
3329 | return 0; | ||
3330 | } | ||
3331 | |||
3332 | static int mtip_block_resume(struct driver_data *dd) | ||
3333 | { | ||
3334 | dev_info(&dd->pdev->dev, "Resuming %s ...\n", | ||
3335 | dd->disk->disk_name); | ||
3336 | mtip_hw_resume(dd); | ||
3337 | return 0; | ||
3338 | } | ||
3339 | |||
3340 | /* | ||
3341 | * Called for each supported PCI device detected. | ||
3342 | * | ||
3343 | * This function allocates the private data structure, enables the | ||
3344 | * PCI device and then calls the block layer initialization function. | ||
3345 | * | ||
3346 | * return value | ||
3347 | * 0 on success else an error code. | ||
3348 | */ | ||
3349 | static int mtip_pci_probe(struct pci_dev *pdev, | ||
3350 | const struct pci_device_id *ent) | ||
3351 | { | ||
3352 | int rv = 0; | ||
3353 | struct driver_data *dd = NULL; | ||
3354 | |||
3355 | /* Allocate memory for this devices private data. */ | ||
3356 | dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL); | ||
3357 | if (dd == NULL) { | ||
3358 | dev_err(&pdev->dev, | ||
3359 | "Unable to allocate memory for driver data\n"); | ||
3360 | return -ENOMEM; | ||
3361 | } | ||
3362 | |||
3363 | /* Set the atomic variable as 1 in case of SRSI */ | ||
3364 | atomic_set(&dd->drv_cleanup_done, true); | ||
3365 | |||
3366 | atomic_set(&dd->resumeflag, false); | ||
3367 | |||
3368 | /* Attach the private data to this PCI device. */ | ||
3369 | pci_set_drvdata(pdev, dd); | ||
3370 | |||
3371 | rv = pcim_enable_device(pdev); | ||
3372 | if (rv < 0) { | ||
3373 | dev_err(&pdev->dev, "Unable to enable device\n"); | ||
3374 | goto iomap_err; | ||
3375 | } | ||
3376 | |||
3377 | /* Map BAR5 to memory. */ | ||
3378 | rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME); | ||
3379 | if (rv < 0) { | ||
3380 | dev_err(&pdev->dev, "Unable to map regions\n"); | ||
3381 | goto iomap_err; | ||
3382 | } | ||
3383 | |||
3384 | if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { | ||
3385 | rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); | ||
3386 | |||
3387 | if (rv) { | ||
3388 | rv = pci_set_consistent_dma_mask(pdev, | ||
3389 | DMA_BIT_MASK(32)); | ||
3390 | if (rv) { | ||
3391 | dev_warn(&pdev->dev, | ||
3392 | "64-bit DMA enable failed\n"); | ||
3393 | goto setmask_err; | ||
3394 | } | ||
3395 | } | ||
3396 | } | ||
3397 | |||
3398 | pci_set_master(pdev); | ||
3399 | |||
3400 | if (pci_enable_msi(pdev)) { | ||
3401 | dev_warn(&pdev->dev, | ||
3402 | "Unable to enable MSI interrupt.\n"); | ||
3403 | goto block_initialize_err; | ||
3404 | } | ||
3405 | |||
3406 | /* Copy the info we may need later into the private data structure. */ | ||
3407 | dd->major = mtip_major; | ||
3408 | dd->instance = instance; | ||
3409 | dd->pdev = pdev; | ||
3410 | |||
3411 | /* Initialize the block layer. */ | ||
3412 | rv = mtip_block_initialize(dd); | ||
3413 | if (rv < 0) { | ||
3414 | dev_err(&pdev->dev, | ||
3415 | "Unable to initialize block layer\n"); | ||
3416 | goto block_initialize_err; | ||
3417 | } | ||
3418 | |||
3419 | /* | ||
3420 | * Increment the instance count so that each device has a unique | ||
3421 | * instance number. | ||
3422 | */ | ||
3423 | instance++; | ||
3424 | |||
3425 | goto done; | ||
3426 | |||
3427 | block_initialize_err: | ||
3428 | pci_disable_msi(pdev); | ||
3429 | |||
3430 | setmask_err: | ||
3431 | pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); | ||
3432 | |||
3433 | iomap_err: | ||
3434 | kfree(dd); | ||
3435 | pci_set_drvdata(pdev, NULL); | ||
3436 | return rv; | ||
3437 | done: | ||
3438 | /* Set the atomic variable as 0 in case of SRSI */ | ||
3439 | atomic_set(&dd->drv_cleanup_done, true); | ||
3440 | |||
3441 | return rv; | ||
3442 | } | ||
3443 | |||
3444 | /* | ||
3445 | * Called for each probed device when the device is removed or the | ||
3446 | * driver is unloaded. | ||
3447 | * | ||
3448 | * return value | ||
3449 | * None | ||
3450 | */ | ||
3451 | static void mtip_pci_remove(struct pci_dev *pdev) | ||
3452 | { | ||
3453 | struct driver_data *dd = pci_get_drvdata(pdev); | ||
3454 | int counter = 0; | ||
3455 | |||
3456 | if (mtip_check_surprise_removal(pdev)) { | ||
3457 | while (atomic_read(&dd->drv_cleanup_done) == false) { | ||
3458 | counter++; | ||
3459 | msleep(20); | ||
3460 | if (counter == 10) { | ||
3461 | /* Cleanup the outstanding commands */ | ||
3462 | mtip_command_cleanup(dd); | ||
3463 | break; | ||
3464 | } | ||
3465 | } | ||
3466 | } | ||
3467 | /* Set the atomic variable as 1 in case of SRSI */ | ||
3468 | atomic_set(&dd->drv_cleanup_done, true); | ||
3469 | |||
3470 | /* Clean up the block layer. */ | ||
3471 | mtip_block_remove(dd); | ||
3472 | |||
3473 | pci_disable_msi(pdev); | ||
3474 | |||
3475 | kfree(dd); | ||
3476 | pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); | ||
3477 | } | ||
3478 | |||
3479 | /* | ||
3480 | * Called for each probed device when the device is suspended. | ||
3481 | * | ||
3482 | * return value | ||
3483 | * 0 Success | ||
3484 | * <0 Error | ||
3485 | */ | ||
3486 | static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) | ||
3487 | { | ||
3488 | int rv = 0; | ||
3489 | struct driver_data *dd = pci_get_drvdata(pdev); | ||
3490 | |||
3491 | if (!dd) { | ||
3492 | dev_err(&pdev->dev, | ||
3493 | "Driver private datastructure is NULL\n"); | ||
3494 | return -EFAULT; | ||
3495 | } | ||
3496 | |||
3497 | atomic_set(&dd->resumeflag, true); | ||
3498 | |||
3499 | /* Disable ports & interrupts then send standby immediate */ | ||
3500 | rv = mtip_block_suspend(dd); | ||
3501 | if (rv < 0) { | ||
3502 | dev_err(&pdev->dev, | ||
3503 | "Failed to suspend controller\n"); | ||
3504 | return rv; | ||
3505 | } | ||
3506 | |||
3507 | /* | ||
3508 | * Save the pci config space to pdev structure & | ||
3509 | * disable the device | ||
3510 | */ | ||
3511 | pci_save_state(pdev); | ||
3512 | pci_disable_device(pdev); | ||
3513 | |||
3514 | /* Move to Low power state*/ | ||
3515 | pci_set_power_state(pdev, PCI_D3hot); | ||
3516 | |||
3517 | return rv; | ||
3518 | } | ||
3519 | |||
3520 | /* | ||
3521 | * Called for each probed device when the device is resumed. | ||
3522 | * | ||
3523 | * return value | ||
3524 | * 0 Success | ||
3525 | * <0 Error | ||
3526 | */ | ||
3527 | static int mtip_pci_resume(struct pci_dev *pdev) | ||
3528 | { | ||
3529 | int rv = 0; | ||
3530 | struct driver_data *dd; | ||
3531 | |||
3532 | dd = pci_get_drvdata(pdev); | ||
3533 | if (!dd) { | ||
3534 | dev_err(&pdev->dev, | ||
3535 | "Driver private datastructure is NULL\n"); | ||
3536 | return -EFAULT; | ||
3537 | } | ||
3538 | |||
3539 | /* Move the device to active State */ | ||
3540 | pci_set_power_state(pdev, PCI_D0); | ||
3541 | |||
3542 | /* Restore PCI configuration space */ | ||
3543 | pci_restore_state(pdev); | ||
3544 | |||
3545 | /* Enable the PCI device*/ | ||
3546 | rv = pcim_enable_device(pdev); | ||
3547 | if (rv < 0) { | ||
3548 | dev_err(&pdev->dev, | ||
3549 | "Failed to enable card during resume\n"); | ||
3550 | goto err; | ||
3551 | } | ||
3552 | pci_set_master(pdev); | ||
3553 | |||
3554 | /* | ||
3555 | * Calls hbaReset, initPort, & startPort function | ||
3556 | * then enables interrupts | ||
3557 | */ | ||
3558 | rv = mtip_block_resume(dd); | ||
3559 | if (rv < 0) | ||
3560 | dev_err(&pdev->dev, "Unable to resume\n"); | ||
3561 | |||
3562 | err: | ||
3563 | atomic_set(&dd->resumeflag, false); | ||
3564 | |||
3565 | return rv; | ||
3566 | } | ||
3567 | |||
3568 | /* | ||
3569 | * Shutdown routine | ||
3570 | * | ||
3571 | * return value | ||
3572 | * None | ||
3573 | */ | ||
3574 | static void mtip_pci_shutdown(struct pci_dev *pdev) | ||
3575 | { | ||
3576 | struct driver_data *dd = pci_get_drvdata(pdev); | ||
3577 | if (dd) | ||
3578 | mtip_block_shutdown(dd); | ||
3579 | } | ||
3580 | |||
3581 | /* Table of device ids supported by this driver. */ | ||
3582 | static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = { | ||
3583 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320_DEVICE_ID) }, | ||
3584 | { 0 } | ||
3585 | }; | ||
3586 | |||
3587 | /* Structure that describes the PCI driver functions. */ | ||
3588 | static struct pci_driver mtip_pci_driver = { | ||
3589 | .name = MTIP_DRV_NAME, | ||
3590 | .id_table = mtip_pci_tbl, | ||
3591 | .probe = mtip_pci_probe, | ||
3592 | .remove = mtip_pci_remove, | ||
3593 | .suspend = mtip_pci_suspend, | ||
3594 | .resume = mtip_pci_resume, | ||
3595 | .shutdown = mtip_pci_shutdown, | ||
3596 | }; | ||
3597 | |||
3598 | MODULE_DEVICE_TABLE(pci, mtip_pci_tbl); | ||
3599 | |||
3600 | /* | ||
3601 | * Module initialization function. | ||
3602 | * | ||
3603 | * Called once when the module is loaded. This function allocates a major | ||
3604 | * block device number to the Cyclone devices and registers the PCI layer | ||
3605 | * of the driver. | ||
3606 | * | ||
3607 | * Return value | ||
3608 | * 0 on success else error code. | ||
3609 | */ | ||
3610 | static int __init mtip_init(void) | ||
3611 | { | ||
3612 | printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); | ||
3613 | |||
3614 | /* Allocate a major block device number to use with this driver. */ | ||
3615 | mtip_major = register_blkdev(0, MTIP_DRV_NAME); | ||
3616 | if (mtip_major < 0) { | ||
3617 | printk(KERN_ERR "Unable to register block device (%d)\n", | ||
3618 | mtip_major); | ||
3619 | return -EBUSY; | ||
3620 | } | ||
3621 | |||
3622 | /* Register our PCI operations. */ | ||
3623 | return pci_register_driver(&mtip_pci_driver); | ||
3624 | } | ||
3625 | |||
3626 | /* | ||
3627 | * Module de-initialization function. | ||
3628 | * | ||
3629 | * Called once when the module is unloaded. This function deallocates | ||
3630 | * the major block device number allocated by mtip_init() and | ||
3631 | * unregisters the PCI layer of the driver. | ||
3632 | * | ||
3633 | * Return value | ||
3634 | * none | ||
3635 | */ | ||
3636 | static void __exit mtip_exit(void) | ||
3637 | { | ||
3638 | /* Release the allocated major block device number. */ | ||
3639 | unregister_blkdev(mtip_major, MTIP_DRV_NAME); | ||
3640 | |||
3641 | /* Unregister the PCI driver. */ | ||
3642 | pci_unregister_driver(&mtip_pci_driver); | ||
3643 | } | ||
3644 | |||
3645 | MODULE_AUTHOR("Micron Technology, Inc"); | ||
3646 | MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver"); | ||
3647 | MODULE_LICENSE("GPL"); | ||
3648 | MODULE_VERSION(MTIP_DRV_VERSION); | ||
3649 | |||
3650 | module_init(mtip_init); | ||
3651 | module_exit(mtip_exit); | ||
3652 |
drivers/block/mtip32xx/mtip32xx.h
File was created | 1 | /* | |
2 | * mtip32xx.h - Header file for the P320 SSD Block Driver | ||
3 | * Copyright (C) 2011 Micron Technology, Inc. | ||
4 | * | ||
5 | * Portions of this code were derived from works subjected to the | ||
6 | * following copyright: | ||
7 | * Copyright (C) 2009 Integrated Device Technology, Inc. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #ifndef __MTIP32XX_H__ | ||
22 | #define __MTIP32XX_H__ | ||
23 | |||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/rwsem.h> | ||
26 | #include <linux/ata.h> | ||
27 | #include <linux/interrupt.h> | ||
28 | #include <linux/genhd.h> | ||
29 | #include <linux/version.h> | ||
30 | |||
31 | /* Offset of Subsystem Device ID in pci confoguration space */ | ||
32 | #define PCI_SUBSYSTEM_DEVICEID 0x2E | ||
33 | |||
34 | /* offset of Device Control register in PCIe extended capabilites space */ | ||
35 | #define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48 | ||
36 | |||
37 | /* # of times to retry timed out IOs */ | ||
38 | #define MTIP_MAX_RETRIES 5 | ||
39 | |||
40 | /* Various timeout values in ms */ | ||
41 | #define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000 | ||
42 | #define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000 | ||
43 | #define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000 | ||
44 | |||
45 | /* check for timeouts every 500ms */ | ||
46 | #define MTIP_TIMEOUT_CHECK_PERIOD 500 | ||
47 | |||
48 | /* ftl rebuild */ | ||
49 | #define MTIP_FTL_REBUILD_OFFSET 142 | ||
50 | #define MTIP_FTL_REBUILD_MAGIC 0xED51 | ||
51 | #define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000 | ||
52 | |||
53 | /* Macro to extract the tag bit number from a tag value. */ | ||
54 | #define MTIP_TAG_BIT(tag) (tag & 0x1F) | ||
55 | |||
56 | /* | ||
57 | * Macro to extract the tag index from a tag value. The index | ||
58 | * is used to access the correct s_active/Command Issue register based | ||
59 | * on the tag value. | ||
60 | */ | ||
61 | #define MTIP_TAG_INDEX(tag) (tag >> 5) | ||
62 | |||
63 | /* | ||
64 | * Maximum number of scatter gather entries | ||
65 | * a single command may have. | ||
66 | */ | ||
67 | #define MTIP_MAX_SG 128 | ||
68 | |||
69 | /* | ||
70 | * Maximum number of slot groups (Command Issue & s_active registers) | ||
71 | * NOTE: This is the driver maximum; check dd->slot_groups for actual value. | ||
72 | */ | ||
73 | #define MTIP_MAX_SLOT_GROUPS 8 | ||
74 | |||
75 | /* Internal command tag. */ | ||
76 | #define MTIP_TAG_INTERNAL 0 | ||
77 | |||
78 | /* Micron Vendor ID & P320x SSD Device ID */ | ||
79 | #define PCI_VENDOR_ID_MICRON 0x1344 | ||
80 | #define P320_DEVICE_ID 0x5150 | ||
81 | |||
82 | /* Driver name and version strings */ | ||
83 | #define MTIP_DRV_NAME "mtip32xx" | ||
84 | #define MTIP_DRV_VERSION "1.2.6os3" | ||
85 | |||
86 | /* Maximum number of minor device numbers per device. */ | ||
87 | #define MTIP_MAX_MINORS 16 | ||
88 | |||
89 | /* Maximum number of supported command slots. */ | ||
90 | #define MTIP_MAX_COMMAND_SLOTS (MTIP_MAX_SLOT_GROUPS * 32) | ||
91 | |||
92 | /* | ||
93 | * Per-tag bitfield size in longs. | ||
94 | * Linux bit manipulation functions | ||
95 | * (i.e. test_and_set_bit, find_next_zero_bit) | ||
96 | * manipulate memory in longs, so we try to make the math work. | ||
97 | * take the slot groups and find the number of longs, rounding up. | ||
98 | * Careful! i386 and x86_64 use different size longs! | ||
99 | */ | ||
100 | #define U32_PER_LONG (sizeof(long) / sizeof(u32)) | ||
101 | #define SLOTBITS_IN_LONGS ((MTIP_MAX_SLOT_GROUPS + \ | ||
102 | (U32_PER_LONG-1))/U32_PER_LONG) | ||
103 | |||
104 | /* BAR number used to access the HBA registers. */ | ||
105 | #define MTIP_ABAR 5 | ||
106 | |||
107 | /* Forced Unit Access Bit */ | ||
108 | #define FUA_BIT 0x80 | ||
109 | |||
110 | #ifdef DEBUG | ||
111 | #define dbg_printk(format, arg...) \ | ||
112 | printk(pr_fmt(format), ##arg); | ||
113 | #else | ||
114 | #define dbg_printk(format, arg...) | ||
115 | #endif | ||
116 | |||
117 | #define __force_bit2int (unsigned int __force) | ||
118 | |||
119 | /* below are bit numbers in 'flags' defined in mtip_port */ | ||
120 | #define MTIP_FLAG_IC_ACTIVE_BIT 0 | ||
121 | #define MTIP_FLAG_EH_ACTIVE_BIT 1 | ||
122 | #define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2 | ||
123 | #define MTIP_FLAG_ISSUE_CMDS_BIT 4 | ||
124 | #define MTIP_FLAG_REBUILD_BIT 5 | ||
125 | #define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8 | ||
126 | |||
127 | /* Register Frame Information Structure (FIS), host to device. */ | ||
128 | struct host_to_dev_fis { | ||
129 | /* | ||
130 | * FIS type. | ||
131 | * - 27h Register FIS, host to device. | ||
132 | * - 34h Register FIS, device to host. | ||
133 | * - 39h DMA Activate FIS, device to host. | ||
134 | * - 41h DMA Setup FIS, bi-directional. | ||
135 | * - 46h Data FIS, bi-directional. | ||
136 | * - 58h BIST Activate FIS, bi-directional. | ||
137 | * - 5Fh PIO Setup FIS, device to host. | ||
138 | * - A1h Set Device Bits FIS, device to host. | ||
139 | */ | ||
140 | unsigned char type; | ||
141 | unsigned char opts; | ||
142 | unsigned char command; | ||
143 | unsigned char features; | ||
144 | |||
145 | union { | ||
146 | unsigned char lba_low; | ||
147 | unsigned char sector; | ||
148 | }; | ||
149 | union { | ||
150 | unsigned char lba_mid; | ||
151 | unsigned char cyl_low; | ||
152 | }; | ||
153 | union { | ||
154 | unsigned char lba_hi; | ||
155 | unsigned char cyl_hi; | ||
156 | }; | ||
157 | union { | ||
158 | unsigned char device; | ||
159 | unsigned char head; | ||
160 | }; | ||
161 | |||
162 | union { | ||
163 | unsigned char lba_low_ex; | ||
164 | unsigned char sector_ex; | ||
165 | }; | ||
166 | union { | ||
167 | unsigned char lba_mid_ex; | ||
168 | unsigned char cyl_low_ex; | ||
169 | }; | ||
170 | union { | ||
171 | unsigned char lba_hi_ex; | ||
172 | unsigned char cyl_hi_ex; | ||
173 | }; | ||
174 | unsigned char features_ex; | ||
175 | |||
176 | unsigned char sect_count; | ||
177 | unsigned char sect_cnt_ex; | ||
178 | unsigned char res2; | ||
179 | unsigned char control; | ||
180 | |||
181 | unsigned int res3; | ||
182 | }; | ||
183 | |||
184 | /* Command header structure. */ | ||
185 | struct mtip_cmd_hdr { | ||
186 | /* | ||
187 | * Command options. | ||
188 | * - Bits 31:16 Number of PRD entries. | ||
189 | * - Bits 15:8 Unused in this implementation. | ||
190 | * - Bit 7 Prefetch bit, informs the drive to prefetch PRD entries. | ||
191 | * - Bit 6 Write bit, should be set when writing data to the device. | ||
192 | * - Bit 5 Unused in this implementation. | ||
193 | * - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes). | ||
194 | */ | ||
195 | unsigned int opts; | ||
196 | /* This field is unsed when using NCQ. */ | ||
197 | union { | ||
198 | unsigned int byte_count; | ||
199 | unsigned int status; | ||
200 | }; | ||
201 | /* | ||
202 | * Lower 32 bits of the command table address associated with this | ||
203 | * header. The command table addresses must be 128 byte aligned. | ||
204 | */ | ||
205 | unsigned int ctba; | ||
206 | /* | ||
207 | * If 64 bit addressing is used this field is the upper 32 bits | ||
208 | * of the command table address associated with this command. | ||
209 | */ | ||
210 | unsigned int ctbau; | ||
211 | /* Reserved and unused. */ | ||
212 | unsigned int res[4]; | ||
213 | }; | ||
214 | |||
215 | /* Command scatter gather structure (PRD). */ | ||
216 | struct mtip_cmd_sg { | ||
217 | /* | ||
218 | * Low 32 bits of the data buffer address. For P320 this | ||
219 | * address must be 8 byte aligned signified by bits 2:0 being | ||
220 | * set to 0. | ||
221 | */ | ||
222 | unsigned int dba; | ||
223 | /* | ||
224 | * When 64 bit addressing is used this field is the upper | ||
225 | * 32 bits of the data buffer address. | ||
226 | */ | ||
227 | unsigned int dba_upper; | ||
228 | /* Unused. */ | ||
229 | unsigned int reserved; | ||
230 | /* | ||
231 | * Bit 31: interrupt when this data block has been transferred. | ||
232 | * Bits 30..22: reserved | ||
233 | * Bits 21..0: byte count (minus 1). For P320 the byte count must be | ||
234 | * 8 byte aligned signified by bits 2:0 being set to 1. | ||
235 | */ | ||
236 | unsigned int info; | ||
237 | }; | ||
238 | struct mtip_port; | ||
239 | |||
240 | /* Structure used to describe a command. */ | ||
241 | struct mtip_cmd { | ||
242 | |||
243 | struct mtip_cmd_hdr *command_header; /* ptr to command header entry */ | ||
244 | |||
245 | dma_addr_t command_header_dma; /* corresponding physical address */ | ||
246 | |||
247 | void *command; /* ptr to command table entry */ | ||
248 | |||
249 | dma_addr_t command_dma; /* corresponding physical address */ | ||
250 | |||
251 | void *comp_data; /* data passed to completion function comp_func() */ | ||
252 | /* | ||
253 | * Completion function called by the ISR upon completion of | ||
254 | * a command. | ||
255 | */ | ||
256 | void (*comp_func)(struct mtip_port *port, | ||
257 | int tag, | ||
258 | void *data, | ||
259 | int status); | ||
260 | /* Additional callback function that may be called by comp_func() */ | ||
261 | void (*async_callback)(void *data, int status); | ||
262 | |||
263 | void *async_data; /* Addl. data passed to async_callback() */ | ||
264 | |||
265 | int scatter_ents; /* Number of scatter list entries used */ | ||
266 | |||
267 | struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */ | ||
268 | |||
269 | int retries; /* The number of retries left for this command. */ | ||
270 | |||
271 | int direction; /* Data transfer direction */ | ||
272 | |||
273 | unsigned long comp_time; /* command completion time, in jiffies */ | ||
274 | |||
275 | atomic_t active; /* declares if this command sent to the drive. */ | ||
276 | }; | ||
277 | |||
278 | /* Structure used to describe a port. */ | ||
279 | struct mtip_port { | ||
280 | /* Pointer back to the driver data for this port. */ | ||
281 | struct driver_data *dd; | ||
282 | /* | ||
283 | * Used to determine if the data pointed to by the | ||
284 | * identify field is valid. | ||
285 | */ | ||
286 | unsigned long identify_valid; | ||
287 | /* Base address of the memory mapped IO for the port. */ | ||
288 | void __iomem *mmio; | ||
289 | /* Array of pointers to the memory mapped s_active registers. */ | ||
290 | void __iomem *s_active[MTIP_MAX_SLOT_GROUPS]; | ||
291 | /* Array of pointers to the memory mapped completed registers. */ | ||
292 | void __iomem *completed[MTIP_MAX_SLOT_GROUPS]; | ||
293 | /* Array of pointers to the memory mapped Command Issue registers. */ | ||
294 | void __iomem *cmd_issue[MTIP_MAX_SLOT_GROUPS]; | ||
295 | /* | ||
296 | * Pointer to the beginning of the command header memory as used | ||
297 | * by the driver. | ||
298 | */ | ||
299 | void *command_list; | ||
300 | /* | ||
301 | * Pointer to the beginning of the command header memory as used | ||
302 | * by the DMA. | ||
303 | */ | ||
304 | dma_addr_t command_list_dma; | ||
305 | /* | ||
306 | * Pointer to the beginning of the RX FIS memory as used | ||
307 | * by the driver. | ||
308 | */ | ||
309 | void *rxfis; | ||
310 | /* | ||
311 | * Pointer to the beginning of the RX FIS memory as used | ||
312 | * by the DMA. | ||
313 | */ | ||
314 | dma_addr_t rxfis_dma; | ||
315 | /* | ||
316 | * Pointer to the beginning of the command table memory as used | ||
317 | * by the driver. | ||
318 | */ | ||
319 | void *command_table; | ||
320 | /* | ||
321 | * Pointer to the beginning of the command table memory as used | ||
322 | * by the DMA. | ||
323 | */ | ||
324 | dma_addr_t command_tbl_dma; | ||
325 | /* | ||
326 | * Pointer to the beginning of the identify data memory as used | ||
327 | * by the driver. | ||
328 | */ | ||
329 | u16 *identify; | ||
330 | /* | ||
331 | * Pointer to the beginning of the identify data memory as used | ||
332 | * by the DMA. | ||
333 | */ | ||
334 | dma_addr_t identify_dma; | ||
335 | /* | ||
336 | * Pointer to the beginning of a sector buffer that is used | ||
337 | * by the driver when issuing internal commands. | ||
338 | */ | ||
339 | u16 *sector_buffer; | ||
340 | /* | ||
341 | * Pointer to the beginning of a sector buffer that is used | ||
342 | * by the DMA when the driver issues internal commands. | ||
343 | */ | ||
344 | dma_addr_t sector_buffer_dma; | ||
345 | /* | ||
346 | * Bit significant, used to determine if a command slot has | ||
347 | * been allocated. i.e. the slot is in use. Bits are cleared | ||
348 | * when the command slot and all associated data structures | ||
349 | * are no longer needed. | ||
350 | */ | ||
351 | unsigned long allocated[SLOTBITS_IN_LONGS]; | ||
352 | /* | ||
353 | * used to queue commands when an internal command is in progress | ||
354 | * or error handling is active | ||
355 | */ | ||
356 | unsigned long cmds_to_issue[SLOTBITS_IN_LONGS]; | ||
357 | /* | ||
358 | * Array of command slots. Structure includes pointers to the | ||
359 | * command header and command table, and completion function and data | ||
360 | * pointers. | ||
361 | */ | ||
362 | struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS]; | ||
363 | /* Used by mtip_service_thread to wait for an event */ | ||
364 | wait_queue_head_t svc_wait; | ||
365 | /* | ||
366 | * indicates the state of the port. Also, helps the service thread | ||
367 | * to determine its action on wake up. | ||
368 | */ | ||
369 | unsigned long flags; | ||
370 | /* | ||
371 | * Timer used to complete commands that have been active for too long. | ||
372 | */ | ||
373 | struct timer_list cmd_timer; | ||
374 | /* | ||
375 | * Semaphore used to block threads if there are no | ||
376 | * command slots available. | ||
377 | */ | ||
378 | struct semaphore cmd_slot; | ||
379 | /* Spinlock for working around command-issue bug. */ | ||
380 | spinlock_t cmd_issue_lock; | ||
381 | }; | ||
382 | |||
383 | /* | ||
384 | * Driver private data structure. | ||
385 | * | ||
386 | * One structure is allocated per probed device. | ||
387 | */ | ||
388 | struct driver_data { | ||
389 | void __iomem *mmio; /* Base address of the HBA registers. */ | ||
390 | |||
391 | int major; /* Major device number. */ | ||
392 | |||
393 | int instance; /* Instance number. First device probed is 0, ... */ | ||
394 | |||
395 | struct gendisk *disk; /* Pointer to our gendisk structure. */ | ||
396 | |||
397 | struct pci_dev *pdev; /* Pointer to the PCI device structure. */ | ||
398 | |||
399 | struct request_queue *queue; /* Our request queue. */ | ||
400 | |||
401 | struct mtip_port *port; /* Pointer to the port data structure. */ | ||
402 | |||
403 | /* Tasklet used to process the bottom half of the ISR. */ | ||
404 | struct tasklet_struct tasklet; | ||
405 | |||
406 | unsigned product_type; /* magic value declaring the product type */ | ||
407 | |||
408 | unsigned slot_groups; /* number of slot groups the product supports */ | ||
409 | |||
410 | atomic_t drv_cleanup_done; /* Atomic variable for SRSI */ | ||
411 | |||
412 | unsigned long index; /* Index to determine the disk name */ | ||
413 | |||
414 | unsigned int ftlrebuildflag; /* FTL rebuild flag */ | ||
415 | |||
416 | atomic_t resumeflag; /* Atomic variable to track suspend/resume */ | ||
417 | |||
418 | atomic_t eh_active; /* Flag for error handling tracking */ | ||
419 | |||
420 | struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ | ||
421 | }; | ||
422 | |||
423 | #endif | ||
424 |
drivers/block/xen-blkback/blkback.c
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * | 2 | * |
3 | * Back-end of the driver for virtual block devices. This portion of the | 3 | * Back-end of the driver for virtual block devices. This portion of the |
4 | * driver exports a 'unified' block-device interface that can be accessed | 4 | * driver exports a 'unified' block-device interface that can be accessed |
5 | * by any operating system that implements a compatible front end. A | 5 | * by any operating system that implements a compatible front end. A |
6 | * reference front-end implementation can be found in: | 6 | * reference front-end implementation can be found in: |
7 | * drivers/block/xen-blkfront.c | 7 | * drivers/block/xen-blkfront.c |
8 | * | 8 | * |
9 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | 9 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand |
10 | * Copyright (c) 2005, Christopher Clark | 10 | * Copyright (c) 2005, Christopher Clark |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or | 12 | * This program is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU General Public License version 2 | 13 | * modify it under the terms of the GNU General Public License version 2 |
14 | * as published by the Free Software Foundation; or, when distributed | 14 | * as published by the Free Software Foundation; or, when distributed |
15 | * separately from the Linux kernel or incorporated into other | 15 | * separately from the Linux kernel or incorporated into other |
16 | * software packages, subject to the following license: | 16 | * software packages, subject to the following license: |
17 | * | 17 | * |
18 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 18 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
19 | * of this source file (the "Software"), to deal in the Software without | 19 | * of this source file (the "Software"), to deal in the Software without |
20 | * restriction, including without limitation the rights to use, copy, modify, | 20 | * restriction, including without limitation the rights to use, copy, modify, |
21 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 21 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
22 | * and to permit persons to whom the Software is furnished to do so, subject to | 22 | * and to permit persons to whom the Software is furnished to do so, subject to |
23 | * the following conditions: | 23 | * the following conditions: |
24 | * | 24 | * |
25 | * The above copyright notice and this permission notice shall be included in | 25 | * The above copyright notice and this permission notice shall be included in |
26 | * all copies or substantial portions of the Software. | 26 | * all copies or substantial portions of the Software. |
27 | * | 27 | * |
28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
30 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 30 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
31 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 31 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
32 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 32 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
33 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 33 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
34 | * IN THE SOFTWARE. | 34 | * IN THE SOFTWARE. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #include <linux/spinlock.h> | 37 | #include <linux/spinlock.h> |
38 | #include <linux/kthread.h> | 38 | #include <linux/kthread.h> |
39 | #include <linux/list.h> | 39 | #include <linux/list.h> |
40 | #include <linux/delay.h> | 40 | #include <linux/delay.h> |
41 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
42 | #include <linux/loop.h> | ||
43 | #include <linux/falloc.h> | ||
44 | #include <linux/fs.h> | ||
45 | 42 | ||
46 | #include <xen/events.h> | 43 | #include <xen/events.h> |
47 | #include <xen/page.h> | 44 | #include <xen/page.h> |
48 | #include <asm/xen/hypervisor.h> | 45 | #include <asm/xen/hypervisor.h> |
49 | #include <asm/xen/hypercall.h> | 46 | #include <asm/xen/hypercall.h> |
50 | #include "common.h" | 47 | #include "common.h" |
51 | 48 | ||
52 | /* | 49 | /* |
53 | * These are rather arbitrary. They are fairly large because adjacent requests | 50 | * These are rather arbitrary. They are fairly large because adjacent requests |
54 | * pulled from a communication ring are quite likely to end up being part of | 51 | * pulled from a communication ring are quite likely to end up being part of |
55 | * the same scatter/gather request at the disc. | 52 | * the same scatter/gather request at the disc. |
56 | * | 53 | * |
57 | * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** | 54 | * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** |
58 | * | 55 | * |
59 | * This will increase the chances of being able to write whole tracks. | 56 | * This will increase the chances of being able to write whole tracks. |
60 | * 64 should be enough to keep us competitive with Linux. | 57 | * 64 should be enough to keep us competitive with Linux. |
61 | */ | 58 | */ |
62 | static int xen_blkif_reqs = 64; | 59 | static int xen_blkif_reqs = 64; |
63 | module_param_named(reqs, xen_blkif_reqs, int, 0); | 60 | module_param_named(reqs, xen_blkif_reqs, int, 0); |
64 | MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); | 61 | MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); |
65 | 62 | ||
66 | /* Run-time switchable: /sys/module/blkback/parameters/ */ | 63 | /* Run-time switchable: /sys/module/blkback/parameters/ */ |
67 | static unsigned int log_stats; | 64 | static unsigned int log_stats; |
68 | module_param(log_stats, int, 0644); | 65 | module_param(log_stats, int, 0644); |
69 | 66 | ||
70 | /* | 67 | /* |
71 | * Each outstanding request that we've passed to the lower device layers has a | 68 | * Each outstanding request that we've passed to the lower device layers has a |
72 | * 'pending_req' allocated to it. Each buffer_head that completes decrements | 69 | * 'pending_req' allocated to it. Each buffer_head that completes decrements |
73 | * the pendcnt towards zero. When it hits zero, the specified domain has a | 70 | * the pendcnt towards zero. When it hits zero, the specified domain has a |
74 | * response queued for it, with the saved 'id' passed back. | 71 | * response queued for it, with the saved 'id' passed back. |
75 | */ | 72 | */ |
76 | struct pending_req { | 73 | struct pending_req { |
77 | struct xen_blkif *blkif; | 74 | struct xen_blkif *blkif; |
78 | u64 id; | 75 | u64 id; |
79 | int nr_pages; | 76 | int nr_pages; |
80 | atomic_t pendcnt; | 77 | atomic_t pendcnt; |
81 | unsigned short operation; | 78 | unsigned short operation; |
82 | int status; | 79 | int status; |
83 | struct list_head free_list; | 80 | struct list_head free_list; |
84 | }; | 81 | }; |
85 | 82 | ||
86 | #define BLKBACK_INVALID_HANDLE (~0) | 83 | #define BLKBACK_INVALID_HANDLE (~0) |
87 | 84 | ||
88 | struct xen_blkbk { | 85 | struct xen_blkbk { |
89 | struct pending_req *pending_reqs; | 86 | struct pending_req *pending_reqs; |
90 | /* List of all 'pending_req' available */ | 87 | /* List of all 'pending_req' available */ |
91 | struct list_head pending_free; | 88 | struct list_head pending_free; |
92 | /* And its spinlock. */ | 89 | /* And its spinlock. */ |
93 | spinlock_t pending_free_lock; | 90 | spinlock_t pending_free_lock; |
94 | wait_queue_head_t pending_free_wq; | 91 | wait_queue_head_t pending_free_wq; |
95 | /* The list of all pages that are available. */ | 92 | /* The list of all pages that are available. */ |
96 | struct page **pending_pages; | 93 | struct page **pending_pages; |
97 | /* And the grant handles that are available. */ | 94 | /* And the grant handles that are available. */ |
98 | grant_handle_t *pending_grant_handles; | 95 | grant_handle_t *pending_grant_handles; |
99 | }; | 96 | }; |
100 | 97 | ||
101 | static struct xen_blkbk *blkbk; | 98 | static struct xen_blkbk *blkbk; |
102 | 99 | ||
103 | /* | 100 | /* |
104 | * Little helpful macro to figure out the index and virtual address of the | 101 | * Little helpful macro to figure out the index and virtual address of the |
105 | * pending_pages[..]. For each 'pending_req' we have have up to | 102 | * pending_pages[..]. For each 'pending_req' we have have up to |
106 | * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through | 103 | * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through |
107 | * 10 and would index in the pending_pages[..]. | 104 | * 10 and would index in the pending_pages[..]. |
108 | */ | 105 | */ |
109 | static inline int vaddr_pagenr(struct pending_req *req, int seg) | 106 | static inline int vaddr_pagenr(struct pending_req *req, int seg) |
110 | { | 107 | { |
111 | return (req - blkbk->pending_reqs) * | 108 | return (req - blkbk->pending_reqs) * |
112 | BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; | 109 | BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; |
113 | } | 110 | } |
114 | 111 | ||
115 | #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] | 112 | #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] |
116 | 113 | ||
117 | static inline unsigned long vaddr(struct pending_req *req, int seg) | 114 | static inline unsigned long vaddr(struct pending_req *req, int seg) |
118 | { | 115 | { |
119 | unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); | 116 | unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); |
120 | return (unsigned long)pfn_to_kaddr(pfn); | 117 | return (unsigned long)pfn_to_kaddr(pfn); |
121 | } | 118 | } |
122 | 119 | ||
123 | #define pending_handle(_req, _seg) \ | 120 | #define pending_handle(_req, _seg) \ |
124 | (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) | 121 | (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) |
125 | 122 | ||
126 | 123 | ||
127 | static int do_block_io_op(struct xen_blkif *blkif); | 124 | static int do_block_io_op(struct xen_blkif *blkif); |
128 | static int dispatch_rw_block_io(struct xen_blkif *blkif, | 125 | static int dispatch_rw_block_io(struct xen_blkif *blkif, |
129 | struct blkif_request *req, | 126 | struct blkif_request *req, |
130 | struct pending_req *pending_req); | 127 | struct pending_req *pending_req); |
131 | static void make_response(struct xen_blkif *blkif, u64 id, | 128 | static void make_response(struct xen_blkif *blkif, u64 id, |
132 | unsigned short op, int st); | 129 | unsigned short op, int st); |
133 | 130 | ||
134 | /* | 131 | /* |
135 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. | 132 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. |
136 | */ | 133 | */ |
137 | static struct pending_req *alloc_req(void) | 134 | static struct pending_req *alloc_req(void) |
138 | { | 135 | { |
139 | struct pending_req *req = NULL; | 136 | struct pending_req *req = NULL; |
140 | unsigned long flags; | 137 | unsigned long flags; |
141 | 138 | ||
142 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | 139 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); |
143 | if (!list_empty(&blkbk->pending_free)) { | 140 | if (!list_empty(&blkbk->pending_free)) { |
144 | req = list_entry(blkbk->pending_free.next, struct pending_req, | 141 | req = list_entry(blkbk->pending_free.next, struct pending_req, |
145 | free_list); | 142 | free_list); |
146 | list_del(&req->free_list); | 143 | list_del(&req->free_list); |
147 | } | 144 | } |
148 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | 145 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); |
149 | return req; | 146 | return req; |
150 | } | 147 | } |
151 | 148 | ||
152 | /* | 149 | /* |
153 | * Return the 'pending_req' structure back to the freepool. We also | 150 | * Return the 'pending_req' structure back to the freepool. We also |
154 | * wake up the thread if it was waiting for a free page. | 151 | * wake up the thread if it was waiting for a free page. |
155 | */ | 152 | */ |
156 | static void free_req(struct pending_req *req) | 153 | static void free_req(struct pending_req *req) |
157 | { | 154 | { |
158 | unsigned long flags; | 155 | unsigned long flags; |
159 | int was_empty; | 156 | int was_empty; |
160 | 157 | ||
161 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | 158 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); |
162 | was_empty = list_empty(&blkbk->pending_free); | 159 | was_empty = list_empty(&blkbk->pending_free); |
163 | list_add(&req->free_list, &blkbk->pending_free); | 160 | list_add(&req->free_list, &blkbk->pending_free); |
164 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | 161 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); |
165 | if (was_empty) | 162 | if (was_empty) |
166 | wake_up(&blkbk->pending_free_wq); | 163 | wake_up(&blkbk->pending_free_wq); |
167 | } | 164 | } |
168 | 165 | ||
169 | /* | 166 | /* |
170 | * Routines for managing virtual block devices (vbds). | 167 | * Routines for managing virtual block devices (vbds). |
171 | */ | 168 | */ |
172 | static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, | 169 | static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, |
173 | int operation) | 170 | int operation) |
174 | { | 171 | { |
175 | struct xen_vbd *vbd = &blkif->vbd; | 172 | struct xen_vbd *vbd = &blkif->vbd; |
176 | int rc = -EACCES; | 173 | int rc = -EACCES; |
177 | 174 | ||
178 | if ((operation != READ) && vbd->readonly) | 175 | if ((operation != READ) && vbd->readonly) |
179 | goto out; | 176 | goto out; |
180 | 177 | ||
181 | if (likely(req->nr_sects)) { | 178 | if (likely(req->nr_sects)) { |
182 | blkif_sector_t end = req->sector_number + req->nr_sects; | 179 | blkif_sector_t end = req->sector_number + req->nr_sects; |
183 | 180 | ||
184 | if (unlikely(end < req->sector_number)) | 181 | if (unlikely(end < req->sector_number)) |
185 | goto out; | 182 | goto out; |
186 | if (unlikely(end > vbd_sz(vbd))) | 183 | if (unlikely(end > vbd_sz(vbd))) |
187 | goto out; | 184 | goto out; |
188 | } | 185 | } |
189 | 186 | ||
190 | req->dev = vbd->pdevice; | 187 | req->dev = vbd->pdevice; |
191 | req->bdev = vbd->bdev; | 188 | req->bdev = vbd->bdev; |
192 | rc = 0; | 189 | rc = 0; |
193 | 190 | ||
194 | out: | 191 | out: |
195 | return rc; | 192 | return rc; |
196 | } | 193 | } |
197 | 194 | ||
198 | static void xen_vbd_resize(struct xen_blkif *blkif) | 195 | static void xen_vbd_resize(struct xen_blkif *blkif) |
199 | { | 196 | { |
200 | struct xen_vbd *vbd = &blkif->vbd; | 197 | struct xen_vbd *vbd = &blkif->vbd; |
201 | struct xenbus_transaction xbt; | 198 | struct xenbus_transaction xbt; |
202 | int err; | 199 | int err; |
203 | struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); | 200 | struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); |
204 | unsigned long long new_size = vbd_sz(vbd); | 201 | unsigned long long new_size = vbd_sz(vbd); |
205 | 202 | ||
206 | pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n", | 203 | pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n", |
207 | blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); | 204 | blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); |
208 | pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size); | 205 | pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size); |
209 | vbd->size = new_size; | 206 | vbd->size = new_size; |
210 | again: | 207 | again: |
211 | err = xenbus_transaction_start(&xbt); | 208 | err = xenbus_transaction_start(&xbt); |
212 | if (err) { | 209 | if (err) { |
213 | pr_warn(DRV_PFX "Error starting transaction"); | 210 | pr_warn(DRV_PFX "Error starting transaction"); |
214 | return; | 211 | return; |
215 | } | 212 | } |
216 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | 213 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", |
217 | (unsigned long long)vbd_sz(vbd)); | 214 | (unsigned long long)vbd_sz(vbd)); |
218 | if (err) { | 215 | if (err) { |
219 | pr_warn(DRV_PFX "Error writing new size"); | 216 | pr_warn(DRV_PFX "Error writing new size"); |
220 | goto abort; | 217 | goto abort; |
221 | } | 218 | } |
222 | /* | 219 | /* |
223 | * Write the current state; we will use this to synchronize | 220 | * Write the current state; we will use this to synchronize |
224 | * the front-end. If the current state is "connected" the | 221 | * the front-end. If the current state is "connected" the |
225 | * front-end will get the new size information online. | 222 | * front-end will get the new size information online. |
226 | */ | 223 | */ |
227 | err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); | 224 | err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); |
228 | if (err) { | 225 | if (err) { |
229 | pr_warn(DRV_PFX "Error writing the state"); | 226 | pr_warn(DRV_PFX "Error writing the state"); |
230 | goto abort; | 227 | goto abort; |
231 | } | 228 | } |
232 | 229 | ||
233 | err = xenbus_transaction_end(xbt, 0); | 230 | err = xenbus_transaction_end(xbt, 0); |
234 | if (err == -EAGAIN) | 231 | if (err == -EAGAIN) |
235 | goto again; | 232 | goto again; |
236 | if (err) | 233 | if (err) |
237 | pr_warn(DRV_PFX "Error ending transaction"); | 234 | pr_warn(DRV_PFX "Error ending transaction"); |
238 | return; | 235 | return; |
239 | abort: | 236 | abort: |
240 | xenbus_transaction_end(xbt, 1); | 237 | xenbus_transaction_end(xbt, 1); |
241 | } | 238 | } |
242 | 239 | ||
243 | /* | 240 | /* |
244 | * Notification from the guest OS. | 241 | * Notification from the guest OS. |
245 | */ | 242 | */ |
246 | static void blkif_notify_work(struct xen_blkif *blkif) | 243 | static void blkif_notify_work(struct xen_blkif *blkif) |
247 | { | 244 | { |
248 | blkif->waiting_reqs = 1; | 245 | blkif->waiting_reqs = 1; |
249 | wake_up(&blkif->wq); | 246 | wake_up(&blkif->wq); |
250 | } | 247 | } |
251 | 248 | ||
252 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id) | 249 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id) |
253 | { | 250 | { |
254 | blkif_notify_work(dev_id); | 251 | blkif_notify_work(dev_id); |
255 | return IRQ_HANDLED; | 252 | return IRQ_HANDLED; |
256 | } | 253 | } |
257 | 254 | ||
258 | /* | 255 | /* |
259 | * SCHEDULER FUNCTIONS | 256 | * SCHEDULER FUNCTIONS |
260 | */ | 257 | */ |
261 | 258 | ||
262 | static void print_stats(struct xen_blkif *blkif) | 259 | static void print_stats(struct xen_blkif *blkif) |
263 | { | 260 | { |
264 | pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" | 261 | pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" |
265 | " | ds %4d\n", | 262 | " | ds %4d\n", |
266 | current->comm, blkif->st_oo_req, | 263 | current->comm, blkif->st_oo_req, |
267 | blkif->st_rd_req, blkif->st_wr_req, | 264 | blkif->st_rd_req, blkif->st_wr_req, |
268 | blkif->st_f_req, blkif->st_ds_req); | 265 | blkif->st_f_req, blkif->st_ds_req); |
269 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | 266 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); |
270 | blkif->st_rd_req = 0; | 267 | blkif->st_rd_req = 0; |
271 | blkif->st_wr_req = 0; | 268 | blkif->st_wr_req = 0; |
272 | blkif->st_oo_req = 0; | 269 | blkif->st_oo_req = 0; |
273 | blkif->st_ds_req = 0; | 270 | blkif->st_ds_req = 0; |
274 | } | 271 | } |
275 | 272 | ||
276 | int xen_blkif_schedule(void *arg) | 273 | int xen_blkif_schedule(void *arg) |
277 | { | 274 | { |
278 | struct xen_blkif *blkif = arg; | 275 | struct xen_blkif *blkif = arg; |
279 | struct xen_vbd *vbd = &blkif->vbd; | 276 | struct xen_vbd *vbd = &blkif->vbd; |
280 | 277 | ||
281 | xen_blkif_get(blkif); | 278 | xen_blkif_get(blkif); |
282 | 279 | ||
283 | while (!kthread_should_stop()) { | 280 | while (!kthread_should_stop()) { |
284 | if (try_to_freeze()) | 281 | if (try_to_freeze()) |
285 | continue; | 282 | continue; |
286 | if (unlikely(vbd->size != vbd_sz(vbd))) | 283 | if (unlikely(vbd->size != vbd_sz(vbd))) |
287 | xen_vbd_resize(blkif); | 284 | xen_vbd_resize(blkif); |
288 | 285 | ||
289 | wait_event_interruptible( | 286 | wait_event_interruptible( |
290 | blkif->wq, | 287 | blkif->wq, |
291 | blkif->waiting_reqs || kthread_should_stop()); | 288 | blkif->waiting_reqs || kthread_should_stop()); |
292 | wait_event_interruptible( | 289 | wait_event_interruptible( |
293 | blkbk->pending_free_wq, | 290 | blkbk->pending_free_wq, |
294 | !list_empty(&blkbk->pending_free) || | 291 | !list_empty(&blkbk->pending_free) || |
295 | kthread_should_stop()); | 292 | kthread_should_stop()); |
296 | 293 | ||
297 | blkif->waiting_reqs = 0; | 294 | blkif->waiting_reqs = 0; |
298 | smp_mb(); /* clear flag *before* checking for work */ | 295 | smp_mb(); /* clear flag *before* checking for work */ |
299 | 296 | ||
300 | if (do_block_io_op(blkif)) | 297 | if (do_block_io_op(blkif)) |
301 | blkif->waiting_reqs = 1; | 298 | blkif->waiting_reqs = 1; |
302 | 299 | ||
303 | if (log_stats && time_after(jiffies, blkif->st_print)) | 300 | if (log_stats && time_after(jiffies, blkif->st_print)) |
304 | print_stats(blkif); | 301 | print_stats(blkif); |
305 | } | 302 | } |
306 | 303 | ||
307 | if (log_stats) | 304 | if (log_stats) |
308 | print_stats(blkif); | 305 | print_stats(blkif); |
309 | 306 | ||
310 | blkif->xenblkd = NULL; | 307 | blkif->xenblkd = NULL; |
311 | xen_blkif_put(blkif); | 308 | xen_blkif_put(blkif); |
312 | 309 | ||
313 | return 0; | 310 | return 0; |
314 | } | 311 | } |
315 | 312 | ||
316 | struct seg_buf { | 313 | struct seg_buf { |
317 | unsigned long buf; | 314 | unsigned long buf; |
318 | unsigned int nsec; | 315 | unsigned int nsec; |
319 | }; | 316 | }; |
320 | /* | 317 | /* |
321 | * Unmap the grant references, and also remove the M2P over-rides | 318 | * Unmap the grant references, and also remove the M2P over-rides |
322 | * used in the 'pending_req'. | 319 | * used in the 'pending_req'. |
323 | */ | 320 | */ |
324 | static void xen_blkbk_unmap(struct pending_req *req) | 321 | static void xen_blkbk_unmap(struct pending_req *req) |
325 | { | 322 | { |
326 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 323 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
327 | unsigned int i, invcount = 0; | 324 | unsigned int i, invcount = 0; |
328 | grant_handle_t handle; | 325 | grant_handle_t handle; |
329 | int ret; | 326 | int ret; |
330 | 327 | ||
331 | for (i = 0; i < req->nr_pages; i++) { | 328 | for (i = 0; i < req->nr_pages; i++) { |
332 | handle = pending_handle(req, i); | 329 | handle = pending_handle(req, i); |
333 | if (handle == BLKBACK_INVALID_HANDLE) | 330 | if (handle == BLKBACK_INVALID_HANDLE) |
334 | continue; | 331 | continue; |
335 | gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), | 332 | gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), |
336 | GNTMAP_host_map, handle); | 333 | GNTMAP_host_map, handle); |
337 | pending_handle(req, i) = BLKBACK_INVALID_HANDLE; | 334 | pending_handle(req, i) = BLKBACK_INVALID_HANDLE; |
338 | invcount++; | 335 | invcount++; |
339 | } | 336 | } |
340 | 337 | ||
341 | ret = HYPERVISOR_grant_table_op( | 338 | ret = HYPERVISOR_grant_table_op( |
342 | GNTTABOP_unmap_grant_ref, unmap, invcount); | 339 | GNTTABOP_unmap_grant_ref, unmap, invcount); |
343 | BUG_ON(ret); | 340 | BUG_ON(ret); |
344 | /* | 341 | /* |
345 | * Note, we use invcount, so nr->pages, so we can't index | 342 | * Note, we use invcount, so nr->pages, so we can't index |
346 | * using vaddr(req, i). | 343 | * using vaddr(req, i). |
347 | */ | 344 | */ |
348 | for (i = 0; i < invcount; i++) { | 345 | for (i = 0; i < invcount; i++) { |
349 | ret = m2p_remove_override( | 346 | ret = m2p_remove_override( |
350 | virt_to_page(unmap[i].host_addr), false); | 347 | virt_to_page(unmap[i].host_addr), false); |
351 | if (ret) { | 348 | if (ret) { |
352 | pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n", | 349 | pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n", |
353 | (unsigned long)unmap[i].host_addr); | 350 | (unsigned long)unmap[i].host_addr); |
354 | continue; | 351 | continue; |
355 | } | 352 | } |
356 | } | 353 | } |
357 | } | 354 | } |
358 | 355 | ||
359 | static int xen_blkbk_map(struct blkif_request *req, | 356 | static int xen_blkbk_map(struct blkif_request *req, |
360 | struct pending_req *pending_req, | 357 | struct pending_req *pending_req, |
361 | struct seg_buf seg[]) | 358 | struct seg_buf seg[]) |
362 | { | 359 | { |
363 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 360 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
364 | int i; | 361 | int i; |
365 | int nseg = req->nr_segments; | 362 | int nseg = req->u.rw.nr_segments; |
366 | int ret = 0; | 363 | int ret = 0; |
367 | 364 | ||
368 | /* | 365 | /* |
369 | * Fill out preq.nr_sects with proper amount of sectors, and setup | 366 | * Fill out preq.nr_sects with proper amount of sectors, and setup |
370 | * assign map[..] with the PFN of the page in our domain with the | 367 | * assign map[..] with the PFN of the page in our domain with the |
371 | * corresponding grant reference for each page. | 368 | * corresponding grant reference for each page. |
372 | */ | 369 | */ |
373 | for (i = 0; i < nseg; i++) { | 370 | for (i = 0; i < nseg; i++) { |
374 | uint32_t flags; | 371 | uint32_t flags; |
375 | 372 | ||
376 | flags = GNTMAP_host_map; | 373 | flags = GNTMAP_host_map; |
377 | if (pending_req->operation != BLKIF_OP_READ) | 374 | if (pending_req->operation != BLKIF_OP_READ) |
378 | flags |= GNTMAP_readonly; | 375 | flags |= GNTMAP_readonly; |
379 | gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, | 376 | gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, |
380 | req->u.rw.seg[i].gref, | 377 | req->u.rw.seg[i].gref, |
381 | pending_req->blkif->domid); | 378 | pending_req->blkif->domid); |
382 | } | 379 | } |
383 | 380 | ||
384 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); | 381 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); |
385 | BUG_ON(ret); | 382 | BUG_ON(ret); |
386 | 383 | ||
387 | /* | 384 | /* |
388 | * Now swizzle the MFN in our domain with the MFN from the other domain | 385 | * Now swizzle the MFN in our domain with the MFN from the other domain |
389 | * so that when we access vaddr(pending_req,i) it has the contents of | 386 | * so that when we access vaddr(pending_req,i) it has the contents of |
390 | * the page from the other domain. | 387 | * the page from the other domain. |
391 | */ | 388 | */ |
392 | for (i = 0; i < nseg; i++) { | 389 | for (i = 0; i < nseg; i++) { |
393 | if (unlikely(map[i].status != 0)) { | 390 | if (unlikely(map[i].status != 0)) { |
394 | pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); | 391 | pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); |
395 | map[i].handle = BLKBACK_INVALID_HANDLE; | 392 | map[i].handle = BLKBACK_INVALID_HANDLE; |
396 | ret |= 1; | 393 | ret |= 1; |
397 | } | 394 | } |
398 | 395 | ||
399 | pending_handle(pending_req, i) = map[i].handle; | 396 | pending_handle(pending_req, i) = map[i].handle; |
400 | 397 | ||
401 | if (ret) | 398 | if (ret) |
402 | continue; | 399 | continue; |
403 | 400 | ||
404 | ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), | 401 | ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), |
405 | blkbk->pending_page(pending_req, i), NULL); | 402 | blkbk->pending_page(pending_req, i), NULL); |
406 | if (ret) { | 403 | if (ret) { |
407 | pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", | 404 | pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", |
408 | (unsigned long)map[i].dev_bus_addr, ret); | 405 | (unsigned long)map[i].dev_bus_addr, ret); |
409 | /* We could switch over to GNTTABOP_copy */ | 406 | /* We could switch over to GNTTABOP_copy */ |
410 | continue; | 407 | continue; |
411 | } | 408 | } |
412 | 409 | ||
413 | seg[i].buf = map[i].dev_bus_addr | | 410 | seg[i].buf = map[i].dev_bus_addr | |
414 | (req->u.rw.seg[i].first_sect << 9); | 411 | (req->u.rw.seg[i].first_sect << 9); |
415 | } | 412 | } |
416 | return ret; | 413 | return ret; |
417 | } | 414 | } |
418 | 415 | ||
419 | static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) | 416 | static int dispatch_discard_io(struct xen_blkif *blkif, |
417 | struct blkif_request *req) | ||
420 | { | 418 | { |
421 | int err = 0; | 419 | int err = 0; |
422 | int status = BLKIF_RSP_OKAY; | 420 | int status = BLKIF_RSP_OKAY; |
423 | struct block_device *bdev = blkif->vbd.bdev; | 421 | struct block_device *bdev = blkif->vbd.bdev; |
424 | 422 | ||
425 | if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) | 423 | blkif->st_ds_req++; |
426 | /* just forward the discard request */ | 424 | |
425 | xen_blkif_get(blkif); | ||
426 | if (blkif->blk_backend_type == BLKIF_BACKEND_PHY || | ||
427 | blkif->blk_backend_type == BLKIF_BACKEND_FILE) { | ||
428 | unsigned long secure = (blkif->vbd.discard_secure && | ||
429 | (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? | ||
430 | BLKDEV_DISCARD_SECURE : 0; | ||
427 | err = blkdev_issue_discard(bdev, | 431 | err = blkdev_issue_discard(bdev, |
428 | req->u.discard.sector_number, | 432 | req->u.discard.sector_number, |
429 | req->u.discard.nr_sectors, | 433 | req->u.discard.nr_sectors, |
430 | GFP_KERNEL, 0); | 434 | GFP_KERNEL, secure); |
431 | else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { | ||
432 | /* punch a hole in the backing file */ | ||
433 | struct loop_device *lo = bdev->bd_disk->private_data; | ||
434 | struct file *file = lo->lo_backing_file; | ||
435 | |||
436 | if (file->f_op->fallocate) | ||
437 | err = file->f_op->fallocate(file, | ||
438 | FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, | ||
439 | req->u.discard.sector_number << 9, | ||
440 | req->u.discard.nr_sectors << 9); | ||
441 | else | ||
442 | err = -EOPNOTSUPP; | ||
443 | } else | 435 | } else |
444 | err = -EOPNOTSUPP; | 436 | err = -EOPNOTSUPP; |
445 | 437 | ||
446 | if (err == -EOPNOTSUPP) { | 438 | if (err == -EOPNOTSUPP) { |
447 | pr_debug(DRV_PFX "discard op failed, not supported\n"); | 439 | pr_debug(DRV_PFX "discard op failed, not supported\n"); |
448 | status = BLKIF_RSP_EOPNOTSUPP; | 440 | status = BLKIF_RSP_EOPNOTSUPP; |
449 | } else if (err) | 441 | } else if (err) |
450 | status = BLKIF_RSP_ERROR; | 442 | status = BLKIF_RSP_ERROR; |
451 | 443 | ||
452 | make_response(blkif, req->id, req->operation, status); | 444 | make_response(blkif, req->u.discard.id, req->operation, status); |
445 | xen_blkif_put(blkif); | ||
446 | return err; | ||
453 | } | 447 | } |
454 | 448 | ||
455 | static void xen_blk_drain_io(struct xen_blkif *blkif) | 449 | static void xen_blk_drain_io(struct xen_blkif *blkif) |
456 | { | 450 | { |
457 | atomic_set(&blkif->drain, 1); | 451 | atomic_set(&blkif->drain, 1); |
458 | do { | 452 | do { |
459 | /* The initial value is one, and one refcnt taken at the | 453 | /* The initial value is one, and one refcnt taken at the |
460 | * start of the xen_blkif_schedule thread. */ | 454 | * start of the xen_blkif_schedule thread. */ |
461 | if (atomic_read(&blkif->refcnt) <= 2) | 455 | if (atomic_read(&blkif->refcnt) <= 2) |
462 | break; | 456 | break; |
463 | wait_for_completion_interruptible_timeout( | 457 | wait_for_completion_interruptible_timeout( |
464 | &blkif->drain_complete, HZ); | 458 | &blkif->drain_complete, HZ); |
465 | 459 | ||
466 | if (!atomic_read(&blkif->drain)) | 460 | if (!atomic_read(&blkif->drain)) |
467 | break; | 461 | break; |
468 | } while (!kthread_should_stop()); | 462 | } while (!kthread_should_stop()); |
469 | atomic_set(&blkif->drain, 0); | 463 | atomic_set(&blkif->drain, 0); |
470 | } | 464 | } |
471 | 465 | ||
472 | /* | 466 | /* |
473 | * Completion callback on the bio's. Called as bh->b_end_io() | 467 | * Completion callback on the bio's. Called as bh->b_end_io() |
474 | */ | 468 | */ |
475 | 469 | ||
476 | static void __end_block_io_op(struct pending_req *pending_req, int error) | 470 | static void __end_block_io_op(struct pending_req *pending_req, int error) |
477 | { | 471 | { |
478 | /* An error fails the entire request. */ | 472 | /* An error fails the entire request. */ |
479 | if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && | 473 | if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && |
480 | (error == -EOPNOTSUPP)) { | 474 | (error == -EOPNOTSUPP)) { |
481 | pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); | 475 | pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); |
482 | xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); | 476 | xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); |
483 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | 477 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; |
484 | } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && | 478 | } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && |
485 | (error == -EOPNOTSUPP)) { | 479 | (error == -EOPNOTSUPP)) { |
486 | pr_debug(DRV_PFX "write barrier op failed, not supported\n"); | 480 | pr_debug(DRV_PFX "write barrier op failed, not supported\n"); |
487 | xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); | 481 | xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); |
488 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | 482 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; |
489 | } else if (error) { | 483 | } else if (error) { |
490 | pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," | 484 | pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," |
491 | " error=%d\n", error); | 485 | " error=%d\n", error); |
492 | pending_req->status = BLKIF_RSP_ERROR; | 486 | pending_req->status = BLKIF_RSP_ERROR; |
493 | } | 487 | } |
494 | 488 | ||
495 | /* | 489 | /* |
496 | * If all of the bio's have completed it is time to unmap | 490 | * If all of the bio's have completed it is time to unmap |
497 | * the grant references associated with 'request' and provide | 491 | * the grant references associated with 'request' and provide |
498 | * the proper response on the ring. | 492 | * the proper response on the ring. |
499 | */ | 493 | */ |
500 | if (atomic_dec_and_test(&pending_req->pendcnt)) { | 494 | if (atomic_dec_and_test(&pending_req->pendcnt)) { |
501 | xen_blkbk_unmap(pending_req); | 495 | xen_blkbk_unmap(pending_req); |
502 | make_response(pending_req->blkif, pending_req->id, | 496 | make_response(pending_req->blkif, pending_req->id, |
503 | pending_req->operation, pending_req->status); | 497 | pending_req->operation, pending_req->status); |
504 | xen_blkif_put(pending_req->blkif); | 498 | xen_blkif_put(pending_req->blkif); |
505 | if (atomic_read(&pending_req->blkif->refcnt) <= 2) { | 499 | if (atomic_read(&pending_req->blkif->refcnt) <= 2) { |
506 | if (atomic_read(&pending_req->blkif->drain)) | 500 | if (atomic_read(&pending_req->blkif->drain)) |
507 | complete(&pending_req->blkif->drain_complete); | 501 | complete(&pending_req->blkif->drain_complete); |
508 | } | 502 | } |
509 | free_req(pending_req); | 503 | free_req(pending_req); |
510 | } | 504 | } |
511 | } | 505 | } |
512 | 506 | ||
513 | /* | 507 | /* |
514 | * bio callback. | 508 | * bio callback. |
515 | */ | 509 | */ |
516 | static void end_block_io_op(struct bio *bio, int error) | 510 | static void end_block_io_op(struct bio *bio, int error) |
517 | { | 511 | { |
518 | __end_block_io_op(bio->bi_private, error); | 512 | __end_block_io_op(bio->bi_private, error); |
519 | bio_put(bio); | 513 | bio_put(bio); |
520 | } | 514 | } |
521 | 515 | ||
522 | 516 | ||
523 | 517 | ||
524 | /* | 518 | /* |
525 | * Function to copy the from the ring buffer the 'struct blkif_request' | 519 | * Function to copy the from the ring buffer the 'struct blkif_request' |
526 | * (which has the sectors we want, number of them, grant references, etc), | 520 | * (which has the sectors we want, number of them, grant references, etc), |
527 | * and transmute it to the block API to hand it over to the proper block disk. | 521 | * and transmute it to the block API to hand it over to the proper block disk. |
528 | */ | 522 | */ |
529 | static int | 523 | static int |
530 | __do_block_io_op(struct xen_blkif *blkif) | 524 | __do_block_io_op(struct xen_blkif *blkif) |
531 | { | 525 | { |
532 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | 526 | union blkif_back_rings *blk_rings = &blkif->blk_rings; |
533 | struct blkif_request req; | 527 | struct blkif_request req; |
534 | struct pending_req *pending_req; | 528 | struct pending_req *pending_req; |
535 | RING_IDX rc, rp; | 529 | RING_IDX rc, rp; |
536 | int more_to_do = 0; | 530 | int more_to_do = 0; |
537 | 531 | ||
538 | rc = blk_rings->common.req_cons; | 532 | rc = blk_rings->common.req_cons; |
539 | rp = blk_rings->common.sring->req_prod; | 533 | rp = blk_rings->common.sring->req_prod; |
540 | rmb(); /* Ensure we see queued requests up to 'rp'. */ | 534 | rmb(); /* Ensure we see queued requests up to 'rp'. */ |
541 | 535 | ||
542 | while (rc != rp) { | 536 | while (rc != rp) { |
543 | 537 | ||
544 | if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) | 538 | if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) |
545 | break; | 539 | break; |
546 | 540 | ||
547 | if (kthread_should_stop()) { | 541 | if (kthread_should_stop()) { |
548 | more_to_do = 1; | 542 | more_to_do = 1; |
549 | break; | 543 | break; |
550 | } | 544 | } |
551 | 545 | ||
552 | pending_req = alloc_req(); | 546 | pending_req = alloc_req(); |
553 | if (NULL == pending_req) { | 547 | if (NULL == pending_req) { |
554 | blkif->st_oo_req++; | 548 | blkif->st_oo_req++; |
555 | more_to_do = 1; | 549 | more_to_do = 1; |
556 | break; | 550 | break; |
557 | } | 551 | } |
558 | 552 | ||
559 | switch (blkif->blk_protocol) { | 553 | switch (blkif->blk_protocol) { |
560 | case BLKIF_PROTOCOL_NATIVE: | 554 | case BLKIF_PROTOCOL_NATIVE: |
561 | memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); | 555 | memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); |
562 | break; | 556 | break; |
563 | case BLKIF_PROTOCOL_X86_32: | 557 | case BLKIF_PROTOCOL_X86_32: |
564 | blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); | 558 | blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); |
565 | break; | 559 | break; |
566 | case BLKIF_PROTOCOL_X86_64: | 560 | case BLKIF_PROTOCOL_X86_64: |
567 | blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); | 561 | blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); |
568 | break; | 562 | break; |
569 | default: | 563 | default: |
570 | BUG(); | 564 | BUG(); |
571 | } | 565 | } |
572 | blk_rings->common.req_cons = ++rc; /* before make_response() */ | 566 | blk_rings->common.req_cons = ++rc; /* before make_response() */ |
573 | 567 | ||
574 | /* Apply all sanity checks to /private copy/ of request. */ | 568 | /* Apply all sanity checks to /private copy/ of request. */ |
575 | barrier(); | 569 | barrier(); |
576 | 570 | if (unlikely(req.operation == BLKIF_OP_DISCARD)) { | |
577 | if (dispatch_rw_block_io(blkif, &req, pending_req)) | 571 | free_req(pending_req); |
572 | if (dispatch_discard_io(blkif, &req)) | ||
573 | break; | ||
574 | } else if (dispatch_rw_block_io(blkif, &req, pending_req)) | ||
578 | break; | 575 | break; |
579 | 576 | ||
580 | /* Yield point for this unbounded loop. */ | 577 | /* Yield point for this unbounded loop. */ |
581 | cond_resched(); | 578 | cond_resched(); |
582 | } | 579 | } |
583 | 580 | ||
584 | return more_to_do; | 581 | return more_to_do; |
585 | } | 582 | } |
586 | 583 | ||
587 | static int | 584 | static int |
588 | do_block_io_op(struct xen_blkif *blkif) | 585 | do_block_io_op(struct xen_blkif *blkif) |
589 | { | 586 | { |
590 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | 587 | union blkif_back_rings *blk_rings = &blkif->blk_rings; |
591 | int more_to_do; | 588 | int more_to_do; |
592 | 589 | ||
593 | do { | 590 | do { |
594 | more_to_do = __do_block_io_op(blkif); | 591 | more_to_do = __do_block_io_op(blkif); |
595 | if (more_to_do) | 592 | if (more_to_do) |
596 | break; | 593 | break; |
597 | 594 | ||
598 | RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); | 595 | RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); |
599 | } while (more_to_do); | 596 | } while (more_to_do); |
600 | 597 | ||
601 | return more_to_do; | 598 | return more_to_do; |
602 | } | 599 | } |
603 | /* | 600 | /* |
604 | * Transmutation of the 'struct blkif_request' to a proper 'struct bio' | 601 | * Transmutation of the 'struct blkif_request' to a proper 'struct bio' |
605 | * and call the 'submit_bio' to pass it to the underlying storage. | 602 | * and call the 'submit_bio' to pass it to the underlying storage. |
606 | */ | 603 | */ |
607 | static int dispatch_rw_block_io(struct xen_blkif *blkif, | 604 | static int dispatch_rw_block_io(struct xen_blkif *blkif, |
608 | struct blkif_request *req, | 605 | struct blkif_request *req, |
609 | struct pending_req *pending_req) | 606 | struct pending_req *pending_req) |
610 | { | 607 | { |
611 | struct phys_req preq; | 608 | struct phys_req preq; |
612 | struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 609 | struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
613 | unsigned int nseg; | 610 | unsigned int nseg; |
614 | struct bio *bio = NULL; | 611 | struct bio *bio = NULL; |
615 | struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 612 | struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
616 | int i, nbio = 0; | 613 | int i, nbio = 0; |
617 | int operation; | 614 | int operation; |
618 | struct blk_plug plug; | 615 | struct blk_plug plug; |
619 | bool drain = false; | 616 | bool drain = false; |
620 | 617 | ||
621 | switch (req->operation) { | 618 | switch (req->operation) { |
622 | case BLKIF_OP_READ: | 619 | case BLKIF_OP_READ: |
623 | blkif->st_rd_req++; | 620 | blkif->st_rd_req++; |
624 | operation = READ; | 621 | operation = READ; |
625 | break; | 622 | break; |
626 | case BLKIF_OP_WRITE: | 623 | case BLKIF_OP_WRITE: |
627 | blkif->st_wr_req++; | 624 | blkif->st_wr_req++; |
628 | operation = WRITE_ODIRECT; | 625 | operation = WRITE_ODIRECT; |
629 | break; | 626 | break; |
630 | case BLKIF_OP_WRITE_BARRIER: | 627 | case BLKIF_OP_WRITE_BARRIER: |
631 | drain = true; | 628 | drain = true; |
632 | case BLKIF_OP_FLUSH_DISKCACHE: | 629 | case BLKIF_OP_FLUSH_DISKCACHE: |
633 | blkif->st_f_req++; | 630 | blkif->st_f_req++; |
634 | operation = WRITE_FLUSH; | 631 | operation = WRITE_FLUSH; |
635 | break; | 632 | break; |
636 | case BLKIF_OP_DISCARD: | ||
637 | blkif->st_ds_req++; | ||
638 | operation = REQ_DISCARD; | ||
639 | break; | ||
640 | default: | 633 | default: |
641 | operation = 0; /* make gcc happy */ | 634 | operation = 0; /* make gcc happy */ |
642 | goto fail_response; | 635 | goto fail_response; |
643 | break; | 636 | break; |
644 | } | 637 | } |
645 | 638 | ||
646 | /* Check that the number of segments is sane. */ | 639 | /* Check that the number of segments is sane. */ |
647 | nseg = req->nr_segments; | 640 | nseg = req->u.rw.nr_segments; |
648 | if (unlikely(nseg == 0 && operation != WRITE_FLUSH && | 641 | |
649 | operation != REQ_DISCARD) || | 642 | if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || |
650 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | 643 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { |
651 | pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", | 644 | pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", |
652 | nseg); | 645 | nseg); |
653 | /* Haven't submitted any bio's yet. */ | 646 | /* Haven't submitted any bio's yet. */ |
654 | goto fail_response; | 647 | goto fail_response; |
655 | } | 648 | } |
656 | 649 | ||
657 | preq.dev = req->handle; | 650 | preq.dev = req->u.rw.handle; |
658 | preq.sector_number = req->u.rw.sector_number; | 651 | preq.sector_number = req->u.rw.sector_number; |
659 | preq.nr_sects = 0; | 652 | preq.nr_sects = 0; |
660 | 653 | ||
661 | pending_req->blkif = blkif; | 654 | pending_req->blkif = blkif; |
662 | pending_req->id = req->id; | 655 | pending_req->id = req->u.rw.id; |
663 | pending_req->operation = req->operation; | 656 | pending_req->operation = req->operation; |
664 | pending_req->status = BLKIF_RSP_OKAY; | 657 | pending_req->status = BLKIF_RSP_OKAY; |
665 | pending_req->nr_pages = nseg; | 658 | pending_req->nr_pages = nseg; |
666 | 659 | ||
667 | for (i = 0; i < nseg; i++) { | 660 | for (i = 0; i < nseg; i++) { |
668 | seg[i].nsec = req->u.rw.seg[i].last_sect - | 661 | seg[i].nsec = req->u.rw.seg[i].last_sect - |
669 | req->u.rw.seg[i].first_sect + 1; | 662 | req->u.rw.seg[i].first_sect + 1; |
670 | if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || | 663 | if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || |
671 | (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) | 664 | (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) |
672 | goto fail_response; | 665 | goto fail_response; |
673 | preq.nr_sects += seg[i].nsec; | 666 | preq.nr_sects += seg[i].nsec; |
674 | 667 | ||
675 | } | 668 | } |
676 | 669 | ||
677 | if (xen_vbd_translate(&preq, blkif, operation) != 0) { | 670 | if (xen_vbd_translate(&preq, blkif, operation) != 0) { |
678 | pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", | 671 | pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", |
679 | operation == READ ? "read" : "write", | 672 | operation == READ ? "read" : "write", |
680 | preq.sector_number, | 673 | preq.sector_number, |
681 | preq.sector_number + preq.nr_sects, preq.dev); | 674 | preq.sector_number + preq.nr_sects, preq.dev); |
682 | goto fail_response; | 675 | goto fail_response; |
683 | } | 676 | } |
684 | 677 | ||
685 | /* | 678 | /* |
686 | * This check _MUST_ be done after xen_vbd_translate as the preq.bdev | 679 | * This check _MUST_ be done after xen_vbd_translate as the preq.bdev |
687 | * is set there. | 680 | * is set there. |
688 | */ | 681 | */ |
689 | for (i = 0; i < nseg; i++) { | 682 | for (i = 0; i < nseg; i++) { |
690 | if (((int)preq.sector_number|(int)seg[i].nsec) & | 683 | if (((int)preq.sector_number|(int)seg[i].nsec) & |
691 | ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { | 684 | ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { |
692 | pr_debug(DRV_PFX "Misaligned I/O request from domain %d", | 685 | pr_debug(DRV_PFX "Misaligned I/O request from domain %d", |
693 | blkif->domid); | 686 | blkif->domid); |
694 | goto fail_response; | 687 | goto fail_response; |
695 | } | 688 | } |
696 | } | 689 | } |
697 | 690 | ||
698 | /* Wait on all outstanding I/O's and once that has been completed | 691 | /* Wait on all outstanding I/O's and once that has been completed |
699 | * issue the WRITE_FLUSH. | 692 | * issue the WRITE_FLUSH. |
700 | */ | 693 | */ |
701 | if (drain) | 694 | if (drain) |
702 | xen_blk_drain_io(pending_req->blkif); | 695 | xen_blk_drain_io(pending_req->blkif); |
703 | 696 | ||
704 | /* | 697 | /* |
705 | * If we have failed at this point, we need to undo the M2P override, | 698 | * If we have failed at this point, we need to undo the M2P override, |
706 | * set gnttab_set_unmap_op on all of the grant references and perform | 699 | * set gnttab_set_unmap_op on all of the grant references and perform |
707 | * the hypercall to unmap the grants - that is all done in | 700 | * the hypercall to unmap the grants - that is all done in |
708 | * xen_blkbk_unmap. | 701 | * xen_blkbk_unmap. |
709 | */ | 702 | */ |
710 | if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) | 703 | if (xen_blkbk_map(req, pending_req, seg)) |
711 | goto fail_flush; | 704 | goto fail_flush; |
712 | 705 | ||
713 | /* | 706 | /* |
714 | * This corresponding xen_blkif_put is done in __end_block_io_op, or | 707 | * This corresponding xen_blkif_put is done in __end_block_io_op, or |
715 | * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. | 708 | * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. |
716 | */ | 709 | */ |
717 | xen_blkif_get(blkif); | 710 | xen_blkif_get(blkif); |
718 | 711 | ||
719 | for (i = 0; i < nseg; i++) { | 712 | for (i = 0; i < nseg; i++) { |
720 | while ((bio == NULL) || | 713 | while ((bio == NULL) || |
721 | (bio_add_page(bio, | 714 | (bio_add_page(bio, |
722 | blkbk->pending_page(pending_req, i), | 715 | blkbk->pending_page(pending_req, i), |
723 | seg[i].nsec << 9, | 716 | seg[i].nsec << 9, |
724 | seg[i].buf & ~PAGE_MASK) == 0)) { | 717 | seg[i].buf & ~PAGE_MASK) == 0)) { |
725 | 718 | ||
726 | bio = bio_alloc(GFP_KERNEL, nseg-i); | 719 | bio = bio_alloc(GFP_KERNEL, nseg-i); |
727 | if (unlikely(bio == NULL)) | 720 | if (unlikely(bio == NULL)) |
728 | goto fail_put_bio; | 721 | goto fail_put_bio; |
729 | 722 | ||
730 | biolist[nbio++] = bio; | 723 | biolist[nbio++] = bio; |
731 | bio->bi_bdev = preq.bdev; | 724 | bio->bi_bdev = preq.bdev; |
732 | bio->bi_private = pending_req; | 725 | bio->bi_private = pending_req; |
733 | bio->bi_end_io = end_block_io_op; | 726 | bio->bi_end_io = end_block_io_op; |
734 | bio->bi_sector = preq.sector_number; | 727 | bio->bi_sector = preq.sector_number; |
735 | } | 728 | } |
736 | 729 | ||
737 | preq.sector_number += seg[i].nsec; | 730 | preq.sector_number += seg[i].nsec; |
738 | } | 731 | } |
739 | 732 | ||
740 | /* This will be hit if the operation was a flush or discard. */ | 733 | /* This will be hit if the operation was a flush or discard. */ |
741 | if (!bio) { | 734 | if (!bio) { |
742 | BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); | 735 | BUG_ON(operation != WRITE_FLUSH); |
743 | 736 | ||
744 | if (operation == WRITE_FLUSH) { | 737 | bio = bio_alloc(GFP_KERNEL, 0); |
745 | bio = bio_alloc(GFP_KERNEL, 0); | 738 | if (unlikely(bio == NULL)) |
746 | if (unlikely(bio == NULL)) | 739 | goto fail_put_bio; |
747 | goto fail_put_bio; | ||
748 | 740 | ||
749 | biolist[nbio++] = bio; | 741 | biolist[nbio++] = bio; |
750 | bio->bi_bdev = preq.bdev; | 742 | bio->bi_bdev = preq.bdev; |
751 | bio->bi_private = pending_req; | 743 | bio->bi_private = pending_req; |
752 | bio->bi_end_io = end_block_io_op; | 744 | bio->bi_end_io = end_block_io_op; |
753 | } else if (operation == REQ_DISCARD) { | ||
754 | xen_blk_discard(blkif, req); | ||
755 | xen_blkif_put(blkif); | ||
756 | free_req(pending_req); | ||
757 | return 0; | ||
758 | } | ||
759 | } | 745 | } |
760 | 746 | ||
761 | /* | 747 | /* |
762 | * We set it one so that the last submit_bio does not have to call | 748 | * We set it one so that the last submit_bio does not have to call |
763 | * atomic_inc. | 749 | * atomic_inc. |
764 | */ | 750 | */ |
765 | atomic_set(&pending_req->pendcnt, nbio); | 751 | atomic_set(&pending_req->pendcnt, nbio); |
766 | 752 | ||
767 | /* Get a reference count for the disk queue and start sending I/O */ | 753 | /* Get a reference count for the disk queue and start sending I/O */ |
768 | blk_start_plug(&plug); | 754 | blk_start_plug(&plug); |
769 | 755 | ||
770 | for (i = 0; i < nbio; i++) | 756 | for (i = 0; i < nbio; i++) |
771 | submit_bio(operation, biolist[i]); | 757 | submit_bio(operation, biolist[i]); |
772 | 758 | ||
773 | /* Let the I/Os go.. */ | 759 | /* Let the I/Os go.. */ |
774 | blk_finish_plug(&plug); | 760 | blk_finish_plug(&plug); |
775 | 761 | ||
776 | if (operation == READ) | 762 | if (operation == READ) |
777 | blkif->st_rd_sect += preq.nr_sects; | 763 | blkif->st_rd_sect += preq.nr_sects; |
778 | else if (operation & WRITE) | 764 | else if (operation & WRITE) |
779 | blkif->st_wr_sect += preq.nr_sects; | 765 | blkif->st_wr_sect += preq.nr_sects; |
780 | 766 | ||
781 | return 0; | 767 | return 0; |
782 | 768 | ||
783 | fail_flush: | 769 | fail_flush: |
784 | xen_blkbk_unmap(pending_req); | 770 | xen_blkbk_unmap(pending_req); |
785 | fail_response: | 771 | fail_response: |
786 | /* Haven't submitted any bio's yet. */ | 772 | /* Haven't submitted any bio's yet. */ |
787 | make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); | 773 | make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); |
788 | free_req(pending_req); | 774 | free_req(pending_req); |
789 | msleep(1); /* back off a bit */ | 775 | msleep(1); /* back off a bit */ |
790 | return -EIO; | 776 | return -EIO; |
791 | 777 | ||
792 | fail_put_bio: | 778 | fail_put_bio: |
793 | for (i = 0; i < nbio; i++) | 779 | for (i = 0; i < nbio; i++) |
794 | bio_put(biolist[i]); | 780 | bio_put(biolist[i]); |
795 | __end_block_io_op(pending_req, -EINVAL); | 781 | __end_block_io_op(pending_req, -EINVAL); |
796 | msleep(1); /* back off a bit */ | 782 | msleep(1); /* back off a bit */ |
797 | return -EIO; | 783 | return -EIO; |
798 | } | 784 | } |
799 | 785 | ||
800 | 786 | ||
801 | 787 | ||
802 | /* | 788 | /* |
803 | * Put a response on the ring on how the operation fared. | 789 | * Put a response on the ring on how the operation fared. |
804 | */ | 790 | */ |
805 | static void make_response(struct xen_blkif *blkif, u64 id, | 791 | static void make_response(struct xen_blkif *blkif, u64 id, |
806 | unsigned short op, int st) | 792 | unsigned short op, int st) |
807 | { | 793 | { |
808 | struct blkif_response resp; | 794 | struct blkif_response resp; |
809 | unsigned long flags; | 795 | unsigned long flags; |
810 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | 796 | union blkif_back_rings *blk_rings = &blkif->blk_rings; |
811 | int notify; | 797 | int notify; |
812 | 798 | ||
813 | resp.id = id; | 799 | resp.id = id; |
814 | resp.operation = op; | 800 | resp.operation = op; |
815 | resp.status = st; | 801 | resp.status = st; |
816 | 802 | ||
817 | spin_lock_irqsave(&blkif->blk_ring_lock, flags); | 803 | spin_lock_irqsave(&blkif->blk_ring_lock, flags); |
818 | /* Place on the response ring for the relevant domain. */ | 804 | /* Place on the response ring for the relevant domain. */ |
819 | switch (blkif->blk_protocol) { | 805 | switch (blkif->blk_protocol) { |
820 | case BLKIF_PROTOCOL_NATIVE: | 806 | case BLKIF_PROTOCOL_NATIVE: |
821 | memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), | 807 | memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), |
822 | &resp, sizeof(resp)); | 808 | &resp, sizeof(resp)); |
823 | break; | 809 | break; |
824 | case BLKIF_PROTOCOL_X86_32: | 810 | case BLKIF_PROTOCOL_X86_32: |
825 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), | 811 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), |
826 | &resp, sizeof(resp)); | 812 | &resp, sizeof(resp)); |
827 | break; | 813 | break; |
828 | case BLKIF_PROTOCOL_X86_64: | 814 | case BLKIF_PROTOCOL_X86_64: |
829 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), | 815 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), |
830 | &resp, sizeof(resp)); | 816 | &resp, sizeof(resp)); |
831 | break; | 817 | break; |
832 | default: | 818 | default: |
833 | BUG(); | 819 | BUG(); |
834 | } | 820 | } |
835 | blk_rings->common.rsp_prod_pvt++; | 821 | blk_rings->common.rsp_prod_pvt++; |
836 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); | 822 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); |
837 | spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); | 823 | spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); |
838 | if (notify) | 824 | if (notify) |
839 | notify_remote_via_irq(blkif->irq); | 825 | notify_remote_via_irq(blkif->irq); |
840 | } | 826 | } |
841 | 827 | ||
842 | static int __init xen_blkif_init(void) | 828 | static int __init xen_blkif_init(void) |
843 | { | 829 | { |
844 | int i, mmap_pages; | 830 | int i, mmap_pages; |
845 | int rc = 0; | 831 | int rc = 0; |
846 | 832 | ||
847 | if (!xen_pv_domain()) | 833 | if (!xen_pv_domain()) |
848 | return -ENODEV; | 834 | return -ENODEV; |
849 | 835 | ||
850 | blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); | 836 | blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); |
851 | if (!blkbk) { | 837 | if (!blkbk) { |
852 | pr_alert(DRV_PFX "%s: out of memory!\n", __func__); | 838 | pr_alert(DRV_PFX "%s: out of memory!\n", __func__); |
853 | return -ENOMEM; | 839 | return -ENOMEM; |
854 | } | 840 | } |
855 | 841 | ||
856 | mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; | 842 | mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; |
857 | 843 | ||
858 | blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * | 844 | blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * |
859 | xen_blkif_reqs, GFP_KERNEL); | 845 | xen_blkif_reqs, GFP_KERNEL); |
860 | blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * | 846 | blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * |
861 | mmap_pages, GFP_KERNEL); | 847 | mmap_pages, GFP_KERNEL); |
862 | blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * | 848 | blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * |
863 | mmap_pages, GFP_KERNEL); | 849 | mmap_pages, GFP_KERNEL); |
864 | 850 | ||
865 | if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || | 851 | if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || |
866 | !blkbk->pending_pages) { | 852 | !blkbk->pending_pages) { |
867 | rc = -ENOMEM; | 853 | rc = -ENOMEM; |
868 | goto out_of_memory; | 854 | goto out_of_memory; |
869 | } | 855 | } |
870 | 856 | ||
871 | for (i = 0; i < mmap_pages; i++) { | 857 | for (i = 0; i < mmap_pages; i++) { |
872 | blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; | 858 | blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; |
873 | blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); | 859 | blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); |
874 | if (blkbk->pending_pages[i] == NULL) { | 860 | if (blkbk->pending_pages[i] == NULL) { |
875 | rc = -ENOMEM; | 861 | rc = -ENOMEM; |
876 | goto out_of_memory; | 862 | goto out_of_memory; |
877 | } | 863 | } |
878 | } | 864 | } |
879 | rc = xen_blkif_interface_init(); | 865 | rc = xen_blkif_interface_init(); |
880 | if (rc) | 866 | if (rc) |
881 | goto failed_init; | 867 | goto failed_init; |
882 | 868 | ||
883 | INIT_LIST_HEAD(&blkbk->pending_free); | 869 | INIT_LIST_HEAD(&blkbk->pending_free); |
884 | spin_lock_init(&blkbk->pending_free_lock); | 870 | spin_lock_init(&blkbk->pending_free_lock); |
885 | init_waitqueue_head(&blkbk->pending_free_wq); | 871 | init_waitqueue_head(&blkbk->pending_free_wq); |
886 | 872 | ||
887 | for (i = 0; i < xen_blkif_reqs; i++) | 873 | for (i = 0; i < xen_blkif_reqs; i++) |
888 | list_add_tail(&blkbk->pending_reqs[i].free_list, | 874 | list_add_tail(&blkbk->pending_reqs[i].free_list, |
889 | &blkbk->pending_free); | 875 | &blkbk->pending_free); |
890 | 876 | ||
891 | rc = xen_blkif_xenbus_init(); | 877 | rc = xen_blkif_xenbus_init(); |
892 | if (rc) | 878 | if (rc) |
893 | goto failed_init; | 879 | goto failed_init; |
894 | 880 | ||
895 | return 0; | 881 | return 0; |
896 | 882 | ||
897 | out_of_memory: | 883 | out_of_memory: |
898 | pr_alert(DRV_PFX "%s: out of memory\n", __func__); | 884 | pr_alert(DRV_PFX "%s: out of memory\n", __func__); |
899 | failed_init: | 885 | failed_init: |
900 | kfree(blkbk->pending_reqs); | 886 | kfree(blkbk->pending_reqs); |
901 | kfree(blkbk->pending_grant_handles); | 887 | kfree(blkbk->pending_grant_handles); |
902 | if (blkbk->pending_pages) { | 888 | if (blkbk->pending_pages) { |
903 | for (i = 0; i < mmap_pages; i++) { | 889 | for (i = 0; i < mmap_pages; i++) { |
904 | if (blkbk->pending_pages[i]) | 890 | if (blkbk->pending_pages[i]) |
905 | __free_page(blkbk->pending_pages[i]); | 891 | __free_page(blkbk->pending_pages[i]); |
906 | } | 892 | } |
drivers/block/xen-blkback/common.h
1 | /* | 1 | /* |
2 | * This program is free software; you can redistribute it and/or | 2 | * This program is free software; you can redistribute it and/or |
3 | * modify it under the terms of the GNU General Public License version 2 | 3 | * modify it under the terms of the GNU General Public License version 2 |
4 | * as published by the Free Software Foundation; or, when distributed | 4 | * as published by the Free Software Foundation; or, when distributed |
5 | * separately from the Linux kernel or incorporated into other | 5 | * separately from the Linux kernel or incorporated into other |
6 | * software packages, subject to the following license: | 6 | * software packages, subject to the following license: |
7 | * | 7 | * |
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | * of this source file (the "Software"), to deal in the Software without | 9 | * of this source file (the "Software"), to deal in the Software without |
10 | * restriction, including without limitation the rights to use, copy, modify, | 10 | * restriction, including without limitation the rights to use, copy, modify, |
11 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 11 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
12 | * and to permit persons to whom the Software is furnished to do so, subject to | 12 | * and to permit persons to whom the Software is furnished to do so, subject to |
13 | * the following conditions: | 13 | * the following conditions: |
14 | * | 14 | * |
15 | * The above copyright notice and this permission notice shall be included in | 15 | * The above copyright notice and this permission notice shall be included in |
16 | * all copies or substantial portions of the Software. | 16 | * all copies or substantial portions of the Software. |
17 | * | 17 | * |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
24 | * IN THE SOFTWARE. | 24 | * IN THE SOFTWARE. |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #ifndef __XEN_BLKIF__BACKEND__COMMON_H__ | 27 | #ifndef __XEN_BLKIF__BACKEND__COMMON_H__ |
28 | #define __XEN_BLKIF__BACKEND__COMMON_H__ | 28 | #define __XEN_BLKIF__BACKEND__COMMON_H__ |
29 | 29 | ||
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/interrupt.h> | 31 | #include <linux/interrupt.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/blkdev.h> | 33 | #include <linux/blkdev.h> |
34 | #include <linux/vmalloc.h> | 34 | #include <linux/vmalloc.h> |
35 | #include <linux/wait.h> | 35 | #include <linux/wait.h> |
36 | #include <linux/io.h> | 36 | #include <linux/io.h> |
37 | #include <asm/setup.h> | 37 | #include <asm/setup.h> |
38 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
39 | #include <asm/hypervisor.h> | 39 | #include <asm/hypervisor.h> |
40 | #include <xen/grant_table.h> | 40 | #include <xen/grant_table.h> |
41 | #include <xen/xenbus.h> | 41 | #include <xen/xenbus.h> |
42 | #include <xen/interface/io/ring.h> | 42 | #include <xen/interface/io/ring.h> |
43 | #include <xen/interface/io/blkif.h> | 43 | #include <xen/interface/io/blkif.h> |
44 | #include <xen/interface/io/protocols.h> | 44 | #include <xen/interface/io/protocols.h> |
45 | 45 | ||
46 | #define DRV_PFX "xen-blkback:" | 46 | #define DRV_PFX "xen-blkback:" |
47 | #define DPRINTK(fmt, args...) \ | 47 | #define DPRINTK(fmt, args...) \ |
48 | pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ | 48 | pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ |
49 | __func__, __LINE__, ##args) | 49 | __func__, __LINE__, ##args) |
50 | 50 | ||
51 | 51 | ||
52 | /* Not a real protocol. Used to generate ring structs which contain | 52 | /* Not a real protocol. Used to generate ring structs which contain |
53 | * the elements common to all protocols only. This way we get a | 53 | * the elements common to all protocols only. This way we get a |
54 | * compiler-checkable way to use common struct elements, so we can | 54 | * compiler-checkable way to use common struct elements, so we can |
55 | * avoid using switch(protocol) in a number of places. */ | 55 | * avoid using switch(protocol) in a number of places. */ |
56 | struct blkif_common_request { | 56 | struct blkif_common_request { |
57 | char dummy; | 57 | char dummy; |
58 | }; | 58 | }; |
59 | struct blkif_common_response { | 59 | struct blkif_common_response { |
60 | char dummy; | 60 | char dummy; |
61 | }; | 61 | }; |
62 | 62 | ||
63 | /* i386 protocol version */ | ||
64 | #pragma pack(push, 4) | ||
65 | |||
66 | struct blkif_x86_32_request_rw { | 63 | struct blkif_x86_32_request_rw { |
64 | uint8_t nr_segments; /* number of segments */ | ||
65 | blkif_vdev_t handle; /* only for read/write requests */ | ||
66 | uint64_t id; /* private guest value, echoed in resp */ | ||
67 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 67 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
68 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 68 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
69 | }; | 69 | } __attribute__((__packed__)); |
70 | 70 | ||
71 | struct blkif_x86_32_request_discard { | 71 | struct blkif_x86_32_request_discard { |
72 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ | ||
73 | blkif_vdev_t _pad1; /* was "handle" for read/write requests */ | ||
74 | uint64_t id; /* private guest value, echoed in resp */ | ||
72 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 75 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
73 | uint64_t nr_sectors; | 76 | uint64_t nr_sectors; |
74 | }; | 77 | } __attribute__((__packed__)); |
75 | 78 | ||
76 | struct blkif_x86_32_request { | 79 | struct blkif_x86_32_request { |
77 | uint8_t operation; /* BLKIF_OP_??? */ | 80 | uint8_t operation; /* BLKIF_OP_??? */ |
78 | uint8_t nr_segments; /* number of segments */ | ||
79 | blkif_vdev_t handle; /* only for read/write requests */ | ||
80 | uint64_t id; /* private guest value, echoed in resp */ | ||
81 | union { | 81 | union { |
82 | struct blkif_x86_32_request_rw rw; | 82 | struct blkif_x86_32_request_rw rw; |
83 | struct blkif_x86_32_request_discard discard; | 83 | struct blkif_x86_32_request_discard discard; |
84 | } u; | 84 | } u; |
85 | }; | 85 | } __attribute__((__packed__)); |
86 | |||
87 | /* i386 protocol version */ | ||
88 | #pragma pack(push, 4) | ||
86 | struct blkif_x86_32_response { | 89 | struct blkif_x86_32_response { |
87 | uint64_t id; /* copied from request */ | 90 | uint64_t id; /* copied from request */ |
88 | uint8_t operation; /* copied from request */ | 91 | uint8_t operation; /* copied from request */ |
89 | int16_t status; /* BLKIF_RSP_??? */ | 92 | int16_t status; /* BLKIF_RSP_??? */ |
90 | }; | 93 | }; |
91 | #pragma pack(pop) | 94 | #pragma pack(pop) |
92 | |||
93 | /* x86_64 protocol version */ | 95 | /* x86_64 protocol version */ |
94 | 96 | ||
95 | struct blkif_x86_64_request_rw { | 97 | struct blkif_x86_64_request_rw { |
98 | uint8_t nr_segments; /* number of segments */ | ||
99 | blkif_vdev_t handle; /* only for read/write requests */ | ||
100 | uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */ | ||
101 | uint64_t id; | ||
96 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 102 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
97 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 103 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
98 | }; | 104 | } __attribute__((__packed__)); |
99 | 105 | ||
100 | struct blkif_x86_64_request_discard { | 106 | struct blkif_x86_64_request_discard { |
107 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ | ||
108 | blkif_vdev_t _pad1; /* was "handle" for read/write requests */ | ||
109 | uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */ | ||
110 | uint64_t id; | ||
101 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 111 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
102 | uint64_t nr_sectors; | 112 | uint64_t nr_sectors; |
103 | }; | 113 | } __attribute__((__packed__)); |
104 | 114 | ||
105 | struct blkif_x86_64_request { | 115 | struct blkif_x86_64_request { |
106 | uint8_t operation; /* BLKIF_OP_??? */ | 116 | uint8_t operation; /* BLKIF_OP_??? */ |
107 | uint8_t nr_segments; /* number of segments */ | ||
108 | blkif_vdev_t handle; /* only for read/write requests */ | ||
109 | uint64_t __attribute__((__aligned__(8))) id; | ||
110 | union { | 117 | union { |
111 | struct blkif_x86_64_request_rw rw; | 118 | struct blkif_x86_64_request_rw rw; |
112 | struct blkif_x86_64_request_discard discard; | 119 | struct blkif_x86_64_request_discard discard; |
113 | } u; | 120 | } u; |
114 | }; | 121 | } __attribute__((__packed__)); |
122 | |||
115 | struct blkif_x86_64_response { | 123 | struct blkif_x86_64_response { |
116 | uint64_t __attribute__((__aligned__(8))) id; | 124 | uint64_t __attribute__((__aligned__(8))) id; |
117 | uint8_t operation; /* copied from request */ | 125 | uint8_t operation; /* copied from request */ |
118 | int16_t status; /* BLKIF_RSP_??? */ | 126 | int16_t status; /* BLKIF_RSP_??? */ |
119 | }; | 127 | }; |
120 | 128 | ||
121 | DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, | 129 | DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, |
122 | struct blkif_common_response); | 130 | struct blkif_common_response); |
123 | DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, | 131 | DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, |
124 | struct blkif_x86_32_response); | 132 | struct blkif_x86_32_response); |
125 | DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, | 133 | DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, |
126 | struct blkif_x86_64_response); | 134 | struct blkif_x86_64_response); |
127 | 135 | ||
128 | union blkif_back_rings { | 136 | union blkif_back_rings { |
129 | struct blkif_back_ring native; | 137 | struct blkif_back_ring native; |
130 | struct blkif_common_back_ring common; | 138 | struct blkif_common_back_ring common; |
131 | struct blkif_x86_32_back_ring x86_32; | 139 | struct blkif_x86_32_back_ring x86_32; |
132 | struct blkif_x86_64_back_ring x86_64; | 140 | struct blkif_x86_64_back_ring x86_64; |
133 | }; | 141 | }; |
134 | 142 | ||
135 | enum blkif_protocol { | 143 | enum blkif_protocol { |
136 | BLKIF_PROTOCOL_NATIVE = 1, | 144 | BLKIF_PROTOCOL_NATIVE = 1, |
137 | BLKIF_PROTOCOL_X86_32 = 2, | 145 | BLKIF_PROTOCOL_X86_32 = 2, |
138 | BLKIF_PROTOCOL_X86_64 = 3, | 146 | BLKIF_PROTOCOL_X86_64 = 3, |
139 | }; | 147 | }; |
140 | 148 | ||
141 | enum blkif_backend_type { | 149 | enum blkif_backend_type { |
142 | BLKIF_BACKEND_PHY = 1, | 150 | BLKIF_BACKEND_PHY = 1, |
143 | BLKIF_BACKEND_FILE = 2, | 151 | BLKIF_BACKEND_FILE = 2, |
144 | }; | 152 | }; |
145 | 153 | ||
146 | struct xen_vbd { | 154 | struct xen_vbd { |
147 | /* What the domain refers to this vbd as. */ | 155 | /* What the domain refers to this vbd as. */ |
148 | blkif_vdev_t handle; | 156 | blkif_vdev_t handle; |
149 | /* Non-zero -> read-only */ | 157 | /* Non-zero -> read-only */ |
150 | unsigned char readonly; | 158 | unsigned char readonly; |
151 | /* VDISK_xxx */ | 159 | /* VDISK_xxx */ |
152 | unsigned char type; | 160 | unsigned char type; |
153 | /* phys device that this vbd maps to. */ | 161 | /* phys device that this vbd maps to. */ |
154 | u32 pdevice; | 162 | u32 pdevice; |
155 | struct block_device *bdev; | 163 | struct block_device *bdev; |
156 | /* Cached size parameter. */ | 164 | /* Cached size parameter. */ |
157 | sector_t size; | 165 | sector_t size; |
158 | bool flush_support; | 166 | bool flush_support; |
167 | bool discard_secure; | ||
159 | }; | 168 | }; |
160 | 169 | ||
161 | struct backend_info; | 170 | struct backend_info; |
162 | 171 | ||
163 | struct xen_blkif { | 172 | struct xen_blkif { |
164 | /* Unique identifier for this interface. */ | 173 | /* Unique identifier for this interface. */ |
165 | domid_t domid; | 174 | domid_t domid; |
166 | unsigned int handle; | 175 | unsigned int handle; |
167 | /* Physical parameters of the comms window. */ | 176 | /* Physical parameters of the comms window. */ |
168 | unsigned int irq; | 177 | unsigned int irq; |
169 | /* Comms information. */ | 178 | /* Comms information. */ |
170 | enum blkif_protocol blk_protocol; | 179 | enum blkif_protocol blk_protocol; |
171 | enum blkif_backend_type blk_backend_type; | 180 | enum blkif_backend_type blk_backend_type; |
172 | union blkif_back_rings blk_rings; | 181 | union blkif_back_rings blk_rings; |
173 | void *blk_ring; | 182 | void *blk_ring; |
174 | /* The VBD attached to this interface. */ | 183 | /* The VBD attached to this interface. */ |
175 | struct xen_vbd vbd; | 184 | struct xen_vbd vbd; |
176 | /* Back pointer to the backend_info. */ | 185 | /* Back pointer to the backend_info. */ |
177 | struct backend_info *be; | 186 | struct backend_info *be; |
178 | /* Private fields. */ | 187 | /* Private fields. */ |
179 | spinlock_t blk_ring_lock; | 188 | spinlock_t blk_ring_lock; |
180 | atomic_t refcnt; | 189 | atomic_t refcnt; |
181 | 190 | ||
182 | wait_queue_head_t wq; | 191 | wait_queue_head_t wq; |
183 | /* for barrier (drain) requests */ | 192 | /* for barrier (drain) requests */ |
184 | struct completion drain_complete; | 193 | struct completion drain_complete; |
185 | atomic_t drain; | 194 | atomic_t drain; |
186 | /* One thread per one blkif. */ | 195 | /* One thread per one blkif. */ |
187 | struct task_struct *xenblkd; | 196 | struct task_struct *xenblkd; |
188 | unsigned int waiting_reqs; | 197 | unsigned int waiting_reqs; |
189 | 198 | ||
190 | /* statistics */ | 199 | /* statistics */ |
191 | unsigned long st_print; | 200 | unsigned long st_print; |
192 | int st_rd_req; | 201 | int st_rd_req; |
193 | int st_wr_req; | 202 | int st_wr_req; |
194 | int st_oo_req; | 203 | int st_oo_req; |
195 | int st_f_req; | 204 | int st_f_req; |
196 | int st_ds_req; | 205 | int st_ds_req; |
197 | int st_rd_sect; | 206 | int st_rd_sect; |
198 | int st_wr_sect; | 207 | int st_wr_sect; |
199 | 208 | ||
200 | wait_queue_head_t waiting_to_free; | 209 | wait_queue_head_t waiting_to_free; |
201 | }; | 210 | }; |
202 | 211 | ||
203 | 212 | ||
204 | #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ | 213 | #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ |
205 | (_v)->bdev->bd_part->nr_sects : \ | 214 | (_v)->bdev->bd_part->nr_sects : \ |
206 | get_capacity((_v)->bdev->bd_disk)) | 215 | get_capacity((_v)->bdev->bd_disk)) |
207 | 216 | ||
208 | #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) | 217 | #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) |
209 | #define xen_blkif_put(_b) \ | 218 | #define xen_blkif_put(_b) \ |
210 | do { \ | 219 | do { \ |
211 | if (atomic_dec_and_test(&(_b)->refcnt)) \ | 220 | if (atomic_dec_and_test(&(_b)->refcnt)) \ |
212 | wake_up(&(_b)->waiting_to_free);\ | 221 | wake_up(&(_b)->waiting_to_free);\ |
213 | } while (0) | 222 | } while (0) |
214 | 223 | ||
215 | struct phys_req { | 224 | struct phys_req { |
216 | unsigned short dev; | 225 | unsigned short dev; |
217 | blkif_sector_t nr_sects; | 226 | blkif_sector_t nr_sects; |
218 | struct block_device *bdev; | 227 | struct block_device *bdev; |
219 | blkif_sector_t sector_number; | 228 | blkif_sector_t sector_number; |
220 | }; | 229 | }; |
221 | int xen_blkif_interface_init(void); | 230 | int xen_blkif_interface_init(void); |
222 | 231 | ||
223 | int xen_blkif_xenbus_init(void); | 232 | int xen_blkif_xenbus_init(void); |
224 | 233 | ||
225 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); | 234 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); |
226 | int xen_blkif_schedule(void *arg); | 235 | int xen_blkif_schedule(void *arg); |
227 | 236 | ||
228 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | 237 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, |
229 | struct backend_info *be, int state); | 238 | struct backend_info *be, int state); |
230 | 239 | ||
231 | int xen_blkbk_barrier(struct xenbus_transaction xbt, | 240 | int xen_blkbk_barrier(struct xenbus_transaction xbt, |
232 | struct backend_info *be, int state); | 241 | struct backend_info *be, int state); |
233 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); | 242 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); |
234 | 243 | ||
235 | static inline void blkif_get_x86_32_req(struct blkif_request *dst, | 244 | static inline void blkif_get_x86_32_req(struct blkif_request *dst, |
236 | struct blkif_x86_32_request *src) | 245 | struct blkif_x86_32_request *src) |
237 | { | 246 | { |
238 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; | 247 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; |
239 | dst->operation = src->operation; | 248 | dst->operation = src->operation; |
240 | dst->nr_segments = src->nr_segments; | ||
241 | dst->handle = src->handle; | ||
242 | dst->id = src->id; | ||
243 | switch (src->operation) { | 249 | switch (src->operation) { |
244 | case BLKIF_OP_READ: | 250 | case BLKIF_OP_READ: |
245 | case BLKIF_OP_WRITE: | 251 | case BLKIF_OP_WRITE: |
246 | case BLKIF_OP_WRITE_BARRIER: | 252 | case BLKIF_OP_WRITE_BARRIER: |
247 | case BLKIF_OP_FLUSH_DISKCACHE: | 253 | case BLKIF_OP_FLUSH_DISKCACHE: |
254 | dst->u.rw.nr_segments = src->u.rw.nr_segments; | ||
255 | dst->u.rw.handle = src->u.rw.handle; | ||
256 | dst->u.rw.id = src->u.rw.id; | ||
248 | dst->u.rw.sector_number = src->u.rw.sector_number; | 257 | dst->u.rw.sector_number = src->u.rw.sector_number; |
249 | barrier(); | 258 | barrier(); |
250 | if (n > dst->nr_segments) | 259 | if (n > dst->u.rw.nr_segments) |
251 | n = dst->nr_segments; | 260 | n = dst->u.rw.nr_segments; |
252 | for (i = 0; i < n; i++) | 261 | for (i = 0; i < n; i++) |
253 | dst->u.rw.seg[i] = src->u.rw.seg[i]; | 262 | dst->u.rw.seg[i] = src->u.rw.seg[i]; |
254 | break; | 263 | break; |
255 | case BLKIF_OP_DISCARD: | 264 | case BLKIF_OP_DISCARD: |
265 | dst->u.discard.flag = src->u.discard.flag; | ||
256 | dst->u.discard.sector_number = src->u.discard.sector_number; | 266 | dst->u.discard.sector_number = src->u.discard.sector_number; |
257 | dst->u.discard.nr_sectors = src->u.discard.nr_sectors; | 267 | dst->u.discard.nr_sectors = src->u.discard.nr_sectors; |
258 | break; | 268 | break; |
259 | default: | 269 | default: |
260 | break; | 270 | break; |
261 | } | 271 | } |
262 | } | 272 | } |
263 | 273 | ||
264 | static inline void blkif_get_x86_64_req(struct blkif_request *dst, | 274 | static inline void blkif_get_x86_64_req(struct blkif_request *dst, |
265 | struct blkif_x86_64_request *src) | 275 | struct blkif_x86_64_request *src) |
266 | { | 276 | { |
267 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; | 277 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; |
268 | dst->operation = src->operation; | 278 | dst->operation = src->operation; |
269 | dst->nr_segments = src->nr_segments; | ||
270 | dst->handle = src->handle; | ||
271 | dst->id = src->id; | ||
272 | switch (src->operation) { | 279 | switch (src->operation) { |
273 | case BLKIF_OP_READ: | 280 | case BLKIF_OP_READ: |
274 | case BLKIF_OP_WRITE: | 281 | case BLKIF_OP_WRITE: |
275 | case BLKIF_OP_WRITE_BARRIER: | 282 | case BLKIF_OP_WRITE_BARRIER: |
276 | case BLKIF_OP_FLUSH_DISKCACHE: | 283 | case BLKIF_OP_FLUSH_DISKCACHE: |
284 | dst->u.rw.nr_segments = src->u.rw.nr_segments; | ||
285 | dst->u.rw.handle = src->u.rw.handle; | ||
286 | dst->u.rw.id = src->u.rw.id; | ||
277 | dst->u.rw.sector_number = src->u.rw.sector_number; | 287 | dst->u.rw.sector_number = src->u.rw.sector_number; |
278 | barrier(); | 288 | barrier(); |
279 | if (n > dst->nr_segments) | 289 | if (n > dst->u.rw.nr_segments) |
drivers/block/xen-blkback/xenbus.c
1 | /* Xenbus code for blkif backend | 1 | /* Xenbus code for blkif backend |
2 | Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> | 2 | Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> |
3 | Copyright (C) 2005 XenSource Ltd | 3 | Copyright (C) 2005 XenSource Ltd |
4 | 4 | ||
5 | This program is free software; you can redistribute it and/or modify | 5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by | 6 | it under the terms of the GNU General Public License as published by |
7 | the Free Software Foundation; either version 2 of the License, or | 7 | the Free Software Foundation; either version 2 of the License, or |
8 | (at your option) any later version. | 8 | (at your option) any later version. |
9 | 9 | ||
10 | This program is distributed in the hope that it will be useful, | 10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. | 13 | GNU General Public License for more details. |
14 | 14 | ||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <stdarg.h> | 17 | #include <stdarg.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/kthread.h> | 19 | #include <linux/kthread.h> |
20 | #include <xen/events.h> | 20 | #include <xen/events.h> |
21 | #include <xen/grant_table.h> | 21 | #include <xen/grant_table.h> |
22 | #include "common.h" | 22 | #include "common.h" |
23 | 23 | ||
24 | struct backend_info { | 24 | struct backend_info { |
25 | struct xenbus_device *dev; | 25 | struct xenbus_device *dev; |
26 | struct xen_blkif *blkif; | 26 | struct xen_blkif *blkif; |
27 | struct xenbus_watch backend_watch; | 27 | struct xenbus_watch backend_watch; |
28 | unsigned major; | 28 | unsigned major; |
29 | unsigned minor; | 29 | unsigned minor; |
30 | char *mode; | 30 | char *mode; |
31 | }; | 31 | }; |
32 | 32 | ||
33 | static struct kmem_cache *xen_blkif_cachep; | 33 | static struct kmem_cache *xen_blkif_cachep; |
34 | static void connect(struct backend_info *); | 34 | static void connect(struct backend_info *); |
35 | static int connect_ring(struct backend_info *); | 35 | static int connect_ring(struct backend_info *); |
36 | static void backend_changed(struct xenbus_watch *, const char **, | 36 | static void backend_changed(struct xenbus_watch *, const char **, |
37 | unsigned int); | 37 | unsigned int); |
38 | 38 | ||
39 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) | 39 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) |
40 | { | 40 | { |
41 | return be->dev; | 41 | return be->dev; |
42 | } | 42 | } |
43 | 43 | ||
44 | static int blkback_name(struct xen_blkif *blkif, char *buf) | 44 | static int blkback_name(struct xen_blkif *blkif, char *buf) |
45 | { | 45 | { |
46 | char *devpath, *devname; | 46 | char *devpath, *devname; |
47 | struct xenbus_device *dev = blkif->be->dev; | 47 | struct xenbus_device *dev = blkif->be->dev; |
48 | 48 | ||
49 | devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); | 49 | devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); |
50 | if (IS_ERR(devpath)) | 50 | if (IS_ERR(devpath)) |
51 | return PTR_ERR(devpath); | 51 | return PTR_ERR(devpath); |
52 | 52 | ||
53 | devname = strstr(devpath, "/dev/"); | 53 | devname = strstr(devpath, "/dev/"); |
54 | if (devname != NULL) | 54 | if (devname != NULL) |
55 | devname += strlen("/dev/"); | 55 | devname += strlen("/dev/"); |
56 | else | 56 | else |
57 | devname = devpath; | 57 | devname = devpath; |
58 | 58 | ||
59 | snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); | 59 | snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); |
60 | kfree(devpath); | 60 | kfree(devpath); |
61 | 61 | ||
62 | return 0; | 62 | return 0; |
63 | } | 63 | } |
64 | 64 | ||
65 | static void xen_update_blkif_status(struct xen_blkif *blkif) | 65 | static void xen_update_blkif_status(struct xen_blkif *blkif) |
66 | { | 66 | { |
67 | int err; | 67 | int err; |
68 | char name[TASK_COMM_LEN]; | 68 | char name[TASK_COMM_LEN]; |
69 | 69 | ||
70 | /* Not ready to connect? */ | 70 | /* Not ready to connect? */ |
71 | if (!blkif->irq || !blkif->vbd.bdev) | 71 | if (!blkif->irq || !blkif->vbd.bdev) |
72 | return; | 72 | return; |
73 | 73 | ||
74 | /* Already connected? */ | 74 | /* Already connected? */ |
75 | if (blkif->be->dev->state == XenbusStateConnected) | 75 | if (blkif->be->dev->state == XenbusStateConnected) |
76 | return; | 76 | return; |
77 | 77 | ||
78 | /* Attempt to connect: exit if we fail to. */ | 78 | /* Attempt to connect: exit if we fail to. */ |
79 | connect(blkif->be); | 79 | connect(blkif->be); |
80 | if (blkif->be->dev->state != XenbusStateConnected) | 80 | if (blkif->be->dev->state != XenbusStateConnected) |
81 | return; | 81 | return; |
82 | 82 | ||
83 | err = blkback_name(blkif, name); | 83 | err = blkback_name(blkif, name); |
84 | if (err) { | 84 | if (err) { |
85 | xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); | 85 | xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); |
86 | return; | 86 | return; |
87 | } | 87 | } |
88 | 88 | ||
89 | err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); | 89 | err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); |
90 | if (err) { | 90 | if (err) { |
91 | xenbus_dev_error(blkif->be->dev, err, "block flush"); | 91 | xenbus_dev_error(blkif->be->dev, err, "block flush"); |
92 | return; | 92 | return; |
93 | } | 93 | } |
94 | invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); | 94 | invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); |
95 | 95 | ||
96 | blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); | 96 | blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); |
97 | if (IS_ERR(blkif->xenblkd)) { | 97 | if (IS_ERR(blkif->xenblkd)) { |
98 | err = PTR_ERR(blkif->xenblkd); | 98 | err = PTR_ERR(blkif->xenblkd); |
99 | blkif->xenblkd = NULL; | 99 | blkif->xenblkd = NULL; |
100 | xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); | 100 | xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); |
101 | } | 101 | } |
102 | } | 102 | } |
103 | 103 | ||
104 | static struct xen_blkif *xen_blkif_alloc(domid_t domid) | 104 | static struct xen_blkif *xen_blkif_alloc(domid_t domid) |
105 | { | 105 | { |
106 | struct xen_blkif *blkif; | 106 | struct xen_blkif *blkif; |
107 | 107 | ||
108 | blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); | 108 | blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); |
109 | if (!blkif) | 109 | if (!blkif) |
110 | return ERR_PTR(-ENOMEM); | 110 | return ERR_PTR(-ENOMEM); |
111 | 111 | ||
112 | memset(blkif, 0, sizeof(*blkif)); | 112 | memset(blkif, 0, sizeof(*blkif)); |
113 | blkif->domid = domid; | 113 | blkif->domid = domid; |
114 | spin_lock_init(&blkif->blk_ring_lock); | 114 | spin_lock_init(&blkif->blk_ring_lock); |
115 | atomic_set(&blkif->refcnt, 1); | 115 | atomic_set(&blkif->refcnt, 1); |
116 | init_waitqueue_head(&blkif->wq); | 116 | init_waitqueue_head(&blkif->wq); |
117 | init_completion(&blkif->drain_complete); | 117 | init_completion(&blkif->drain_complete); |
118 | atomic_set(&blkif->drain, 0); | 118 | atomic_set(&blkif->drain, 0); |
119 | blkif->st_print = jiffies; | 119 | blkif->st_print = jiffies; |
120 | init_waitqueue_head(&blkif->waiting_to_free); | 120 | init_waitqueue_head(&blkif->waiting_to_free); |
121 | 121 | ||
122 | return blkif; | 122 | return blkif; |
123 | } | 123 | } |
124 | 124 | ||
125 | static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, | 125 | static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, |
126 | unsigned int evtchn) | 126 | unsigned int evtchn) |
127 | { | 127 | { |
128 | int err; | 128 | int err; |
129 | 129 | ||
130 | /* Already connected through? */ | 130 | /* Already connected through? */ |
131 | if (blkif->irq) | 131 | if (blkif->irq) |
132 | return 0; | 132 | return 0; |
133 | 133 | ||
134 | err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); | 134 | err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); |
135 | if (err < 0) | 135 | if (err < 0) |
136 | return err; | 136 | return err; |
137 | 137 | ||
138 | switch (blkif->blk_protocol) { | 138 | switch (blkif->blk_protocol) { |
139 | case BLKIF_PROTOCOL_NATIVE: | 139 | case BLKIF_PROTOCOL_NATIVE: |
140 | { | 140 | { |
141 | struct blkif_sring *sring; | 141 | struct blkif_sring *sring; |
142 | sring = (struct blkif_sring *)blkif->blk_ring; | 142 | sring = (struct blkif_sring *)blkif->blk_ring; |
143 | BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); | 143 | BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); |
144 | break; | 144 | break; |
145 | } | 145 | } |
146 | case BLKIF_PROTOCOL_X86_32: | 146 | case BLKIF_PROTOCOL_X86_32: |
147 | { | 147 | { |
148 | struct blkif_x86_32_sring *sring_x86_32; | 148 | struct blkif_x86_32_sring *sring_x86_32; |
149 | sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; | 149 | sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; |
150 | BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); | 150 | BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); |
151 | break; | 151 | break; |
152 | } | 152 | } |
153 | case BLKIF_PROTOCOL_X86_64: | 153 | case BLKIF_PROTOCOL_X86_64: |
154 | { | 154 | { |
155 | struct blkif_x86_64_sring *sring_x86_64; | 155 | struct blkif_x86_64_sring *sring_x86_64; |
156 | sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; | 156 | sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; |
157 | BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); | 157 | BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); |
158 | break; | 158 | break; |
159 | } | 159 | } |
160 | default: | 160 | default: |
161 | BUG(); | 161 | BUG(); |
162 | } | 162 | } |
163 | 163 | ||
164 | err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, | 164 | err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, |
165 | xen_blkif_be_int, 0, | 165 | xen_blkif_be_int, 0, |
166 | "blkif-backend", blkif); | 166 | "blkif-backend", blkif); |
167 | if (err < 0) { | 167 | if (err < 0) { |
168 | xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); | 168 | xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); |
169 | blkif->blk_rings.common.sring = NULL; | 169 | blkif->blk_rings.common.sring = NULL; |
170 | return err; | 170 | return err; |
171 | } | 171 | } |
172 | blkif->irq = err; | 172 | blkif->irq = err; |
173 | 173 | ||
174 | return 0; | 174 | return 0; |
175 | } | 175 | } |
176 | 176 | ||
177 | static void xen_blkif_disconnect(struct xen_blkif *blkif) | 177 | static void xen_blkif_disconnect(struct xen_blkif *blkif) |
178 | { | 178 | { |
179 | if (blkif->xenblkd) { | 179 | if (blkif->xenblkd) { |
180 | kthread_stop(blkif->xenblkd); | 180 | kthread_stop(blkif->xenblkd); |
181 | blkif->xenblkd = NULL; | 181 | blkif->xenblkd = NULL; |
182 | } | 182 | } |
183 | 183 | ||
184 | atomic_dec(&blkif->refcnt); | 184 | atomic_dec(&blkif->refcnt); |
185 | wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); | 185 | wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); |
186 | atomic_inc(&blkif->refcnt); | 186 | atomic_inc(&blkif->refcnt); |
187 | 187 | ||
188 | if (blkif->irq) { | 188 | if (blkif->irq) { |
189 | unbind_from_irqhandler(blkif->irq, blkif); | 189 | unbind_from_irqhandler(blkif->irq, blkif); |
190 | blkif->irq = 0; | 190 | blkif->irq = 0; |
191 | } | 191 | } |
192 | 192 | ||
193 | if (blkif->blk_rings.common.sring) { | 193 | if (blkif->blk_rings.common.sring) { |
194 | xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); | 194 | xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); |
195 | blkif->blk_rings.common.sring = NULL; | 195 | blkif->blk_rings.common.sring = NULL; |
196 | } | 196 | } |
197 | } | 197 | } |
198 | 198 | ||
199 | void xen_blkif_free(struct xen_blkif *blkif) | 199 | void xen_blkif_free(struct xen_blkif *blkif) |
200 | { | 200 | { |
201 | if (!atomic_dec_and_test(&blkif->refcnt)) | 201 | if (!atomic_dec_and_test(&blkif->refcnt)) |
202 | BUG(); | 202 | BUG(); |
203 | kmem_cache_free(xen_blkif_cachep, blkif); | 203 | kmem_cache_free(xen_blkif_cachep, blkif); |
204 | } | 204 | } |
205 | 205 | ||
206 | int __init xen_blkif_interface_init(void) | 206 | int __init xen_blkif_interface_init(void) |
207 | { | 207 | { |
208 | xen_blkif_cachep = kmem_cache_create("blkif_cache", | 208 | xen_blkif_cachep = kmem_cache_create("blkif_cache", |
209 | sizeof(struct xen_blkif), | 209 | sizeof(struct xen_blkif), |
210 | 0, 0, NULL); | 210 | 0, 0, NULL); |
211 | if (!xen_blkif_cachep) | 211 | if (!xen_blkif_cachep) |
212 | return -ENOMEM; | 212 | return -ENOMEM; |
213 | 213 | ||
214 | return 0; | 214 | return 0; |
215 | } | 215 | } |
216 | 216 | ||
217 | /* | 217 | /* |
218 | * sysfs interface for VBD I/O requests | 218 | * sysfs interface for VBD I/O requests |
219 | */ | 219 | */ |
220 | 220 | ||
221 | #define VBD_SHOW(name, format, args...) \ | 221 | #define VBD_SHOW(name, format, args...) \ |
222 | static ssize_t show_##name(struct device *_dev, \ | 222 | static ssize_t show_##name(struct device *_dev, \ |
223 | struct device_attribute *attr, \ | 223 | struct device_attribute *attr, \ |
224 | char *buf) \ | 224 | char *buf) \ |
225 | { \ | 225 | { \ |
226 | struct xenbus_device *dev = to_xenbus_device(_dev); \ | 226 | struct xenbus_device *dev = to_xenbus_device(_dev); \ |
227 | struct backend_info *be = dev_get_drvdata(&dev->dev); \ | 227 | struct backend_info *be = dev_get_drvdata(&dev->dev); \ |
228 | \ | 228 | \ |
229 | return sprintf(buf, format, ##args); \ | 229 | return sprintf(buf, format, ##args); \ |
230 | } \ | 230 | } \ |
231 | static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | 231 | static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) |
232 | 232 | ||
233 | VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); | 233 | VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); |
234 | VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); | 234 | VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); |
235 | VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); | 235 | VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); |
236 | VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); | 236 | VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); |
237 | VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); | 237 | VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); |
238 | VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); | 238 | VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); |
239 | VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); | 239 | VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); |
240 | 240 | ||
241 | static struct attribute *xen_vbdstat_attrs[] = { | 241 | static struct attribute *xen_vbdstat_attrs[] = { |
242 | &dev_attr_oo_req.attr, | 242 | &dev_attr_oo_req.attr, |
243 | &dev_attr_rd_req.attr, | 243 | &dev_attr_rd_req.attr, |
244 | &dev_attr_wr_req.attr, | 244 | &dev_attr_wr_req.attr, |
245 | &dev_attr_f_req.attr, | 245 | &dev_attr_f_req.attr, |
246 | &dev_attr_ds_req.attr, | 246 | &dev_attr_ds_req.attr, |
247 | &dev_attr_rd_sect.attr, | 247 | &dev_attr_rd_sect.attr, |
248 | &dev_attr_wr_sect.attr, | 248 | &dev_attr_wr_sect.attr, |
249 | NULL | 249 | NULL |
250 | }; | 250 | }; |
251 | 251 | ||
252 | static struct attribute_group xen_vbdstat_group = { | 252 | static struct attribute_group xen_vbdstat_group = { |
253 | .name = "statistics", | 253 | .name = "statistics", |
254 | .attrs = xen_vbdstat_attrs, | 254 | .attrs = xen_vbdstat_attrs, |
255 | }; | 255 | }; |
256 | 256 | ||
257 | VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); | 257 | VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); |
258 | VBD_SHOW(mode, "%s\n", be->mode); | 258 | VBD_SHOW(mode, "%s\n", be->mode); |
259 | 259 | ||
260 | int xenvbd_sysfs_addif(struct xenbus_device *dev) | 260 | int xenvbd_sysfs_addif(struct xenbus_device *dev) |
261 | { | 261 | { |
262 | int error; | 262 | int error; |
263 | 263 | ||
264 | error = device_create_file(&dev->dev, &dev_attr_physical_device); | 264 | error = device_create_file(&dev->dev, &dev_attr_physical_device); |
265 | if (error) | 265 | if (error) |
266 | goto fail1; | 266 | goto fail1; |
267 | 267 | ||
268 | error = device_create_file(&dev->dev, &dev_attr_mode); | 268 | error = device_create_file(&dev->dev, &dev_attr_mode); |
269 | if (error) | 269 | if (error) |
270 | goto fail2; | 270 | goto fail2; |
271 | 271 | ||
272 | error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group); | 272 | error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group); |
273 | if (error) | 273 | if (error) |
274 | goto fail3; | 274 | goto fail3; |
275 | 275 | ||
276 | return 0; | 276 | return 0; |
277 | 277 | ||
278 | fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); | 278 | fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); |
279 | fail2: device_remove_file(&dev->dev, &dev_attr_mode); | 279 | fail2: device_remove_file(&dev->dev, &dev_attr_mode); |
280 | fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); | 280 | fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); |
281 | return error; | 281 | return error; |
282 | } | 282 | } |
283 | 283 | ||
284 | void xenvbd_sysfs_delif(struct xenbus_device *dev) | 284 | void xenvbd_sysfs_delif(struct xenbus_device *dev) |
285 | { | 285 | { |
286 | sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); | 286 | sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); |
287 | device_remove_file(&dev->dev, &dev_attr_mode); | 287 | device_remove_file(&dev->dev, &dev_attr_mode); |
288 | device_remove_file(&dev->dev, &dev_attr_physical_device); | 288 | device_remove_file(&dev->dev, &dev_attr_physical_device); |
289 | } | 289 | } |
290 | 290 | ||
291 | 291 | ||
292 | static void xen_vbd_free(struct xen_vbd *vbd) | 292 | static void xen_vbd_free(struct xen_vbd *vbd) |
293 | { | 293 | { |
294 | if (vbd->bdev) | 294 | if (vbd->bdev) |
295 | blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); | 295 | blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); |
296 | vbd->bdev = NULL; | 296 | vbd->bdev = NULL; |
297 | } | 297 | } |
298 | 298 | ||
299 | static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, | 299 | static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, |
300 | unsigned major, unsigned minor, int readonly, | 300 | unsigned major, unsigned minor, int readonly, |
301 | int cdrom) | 301 | int cdrom) |
302 | { | 302 | { |
303 | struct xen_vbd *vbd; | 303 | struct xen_vbd *vbd; |
304 | struct block_device *bdev; | 304 | struct block_device *bdev; |
305 | struct request_queue *q; | 305 | struct request_queue *q; |
306 | 306 | ||
307 | vbd = &blkif->vbd; | 307 | vbd = &blkif->vbd; |
308 | vbd->handle = handle; | 308 | vbd->handle = handle; |
309 | vbd->readonly = readonly; | 309 | vbd->readonly = readonly; |
310 | vbd->type = 0; | 310 | vbd->type = 0; |
311 | 311 | ||
312 | vbd->pdevice = MKDEV(major, minor); | 312 | vbd->pdevice = MKDEV(major, minor); |
313 | 313 | ||
314 | bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? | 314 | bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? |
315 | FMODE_READ : FMODE_WRITE, NULL); | 315 | FMODE_READ : FMODE_WRITE, NULL); |
316 | 316 | ||
317 | if (IS_ERR(bdev)) { | 317 | if (IS_ERR(bdev)) { |
318 | DPRINTK("xen_vbd_create: device %08x could not be opened.\n", | 318 | DPRINTK("xen_vbd_create: device %08x could not be opened.\n", |
319 | vbd->pdevice); | 319 | vbd->pdevice); |
320 | return -ENOENT; | 320 | return -ENOENT; |
321 | } | 321 | } |
322 | 322 | ||
323 | vbd->bdev = bdev; | 323 | vbd->bdev = bdev; |
324 | if (vbd->bdev->bd_disk == NULL) { | 324 | if (vbd->bdev->bd_disk == NULL) { |
325 | DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", | 325 | DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", |
326 | vbd->pdevice); | 326 | vbd->pdevice); |
327 | xen_vbd_free(vbd); | 327 | xen_vbd_free(vbd); |
328 | return -ENOENT; | 328 | return -ENOENT; |
329 | } | 329 | } |
330 | vbd->size = vbd_sz(vbd); | 330 | vbd->size = vbd_sz(vbd); |
331 | 331 | ||
332 | if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) | 332 | if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) |
333 | vbd->type |= VDISK_CDROM; | 333 | vbd->type |= VDISK_CDROM; |
334 | if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) | 334 | if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) |
335 | vbd->type |= VDISK_REMOVABLE; | 335 | vbd->type |= VDISK_REMOVABLE; |
336 | 336 | ||
337 | q = bdev_get_queue(bdev); | 337 | q = bdev_get_queue(bdev); |
338 | if (q && q->flush_flags) | 338 | if (q && q->flush_flags) |
339 | vbd->flush_support = true; | 339 | vbd->flush_support = true; |
340 | 340 | ||
341 | if (q && blk_queue_secdiscard(q)) | ||
342 | vbd->discard_secure = true; | ||
343 | |||
341 | DPRINTK("Successful creation of handle=%04x (dom=%u)\n", | 344 | DPRINTK("Successful creation of handle=%04x (dom=%u)\n", |
342 | handle, blkif->domid); | 345 | handle, blkif->domid); |
343 | return 0; | 346 | return 0; |
344 | } | 347 | } |
345 | static int xen_blkbk_remove(struct xenbus_device *dev) | 348 | static int xen_blkbk_remove(struct xenbus_device *dev) |
346 | { | 349 | { |
347 | struct backend_info *be = dev_get_drvdata(&dev->dev); | 350 | struct backend_info *be = dev_get_drvdata(&dev->dev); |
348 | 351 | ||
349 | DPRINTK(""); | 352 | DPRINTK(""); |
350 | 353 | ||
351 | if (be->major || be->minor) | 354 | if (be->major || be->minor) |
352 | xenvbd_sysfs_delif(dev); | 355 | xenvbd_sysfs_delif(dev); |
353 | 356 | ||
354 | if (be->backend_watch.node) { | 357 | if (be->backend_watch.node) { |
355 | unregister_xenbus_watch(&be->backend_watch); | 358 | unregister_xenbus_watch(&be->backend_watch); |
356 | kfree(be->backend_watch.node); | 359 | kfree(be->backend_watch.node); |
357 | be->backend_watch.node = NULL; | 360 | be->backend_watch.node = NULL; |
358 | } | 361 | } |
359 | 362 | ||
360 | if (be->blkif) { | 363 | if (be->blkif) { |
361 | xen_blkif_disconnect(be->blkif); | 364 | xen_blkif_disconnect(be->blkif); |
362 | xen_vbd_free(&be->blkif->vbd); | 365 | xen_vbd_free(&be->blkif->vbd); |
363 | xen_blkif_free(be->blkif); | 366 | xen_blkif_free(be->blkif); |
364 | be->blkif = NULL; | 367 | be->blkif = NULL; |
365 | } | 368 | } |
366 | 369 | ||
367 | kfree(be); | 370 | kfree(be); |
368 | dev_set_drvdata(&dev->dev, NULL); | 371 | dev_set_drvdata(&dev->dev, NULL); |
369 | return 0; | 372 | return 0; |
370 | } | 373 | } |
371 | 374 | ||
372 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | 375 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, |
373 | struct backend_info *be, int state) | 376 | struct backend_info *be, int state) |
374 | { | 377 | { |
375 | struct xenbus_device *dev = be->dev; | 378 | struct xenbus_device *dev = be->dev; |
376 | int err; | 379 | int err; |
377 | 380 | ||
378 | err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", | 381 | err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", |
379 | "%d", state); | 382 | "%d", state); |
380 | if (err) | 383 | if (err) |
381 | xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); | 384 | xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); |
382 | 385 | ||
383 | return err; | 386 | return err; |
384 | } | 387 | } |
385 | 388 | ||
386 | int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) | 389 | int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) |
387 | { | 390 | { |
388 | struct xenbus_device *dev = be->dev; | 391 | struct xenbus_device *dev = be->dev; |
389 | struct xen_blkif *blkif = be->blkif; | 392 | struct xen_blkif *blkif = be->blkif; |
390 | char *type; | 393 | char *type; |
391 | int err; | 394 | int err; |
392 | int state = 0; | 395 | int state = 0; |
393 | 396 | ||
394 | type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); | 397 | type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); |
395 | if (!IS_ERR(type)) { | 398 | if (!IS_ERR(type)) { |
396 | if (strncmp(type, "file", 4) == 0) { | 399 | if (strncmp(type, "file", 4) == 0) { |
397 | state = 1; | 400 | state = 1; |
398 | blkif->blk_backend_type = BLKIF_BACKEND_FILE; | 401 | blkif->blk_backend_type = BLKIF_BACKEND_FILE; |
399 | } | 402 | } |
400 | if (strncmp(type, "phy", 3) == 0) { | 403 | if (strncmp(type, "phy", 3) == 0) { |
401 | struct block_device *bdev = be->blkif->vbd.bdev; | 404 | struct block_device *bdev = be->blkif->vbd.bdev; |
402 | struct request_queue *q = bdev_get_queue(bdev); | 405 | struct request_queue *q = bdev_get_queue(bdev); |
403 | if (blk_queue_discard(q)) { | 406 | if (blk_queue_discard(q)) { |
404 | err = xenbus_printf(xbt, dev->nodename, | 407 | err = xenbus_printf(xbt, dev->nodename, |
405 | "discard-granularity", "%u", | 408 | "discard-granularity", "%u", |
406 | q->limits.discard_granularity); | 409 | q->limits.discard_granularity); |
407 | if (err) { | 410 | if (err) { |
408 | xenbus_dev_fatal(dev, err, | 411 | xenbus_dev_fatal(dev, err, |
409 | "writing discard-granularity"); | 412 | "writing discard-granularity"); |
410 | goto kfree; | 413 | goto kfree; |
411 | } | 414 | } |
412 | err = xenbus_printf(xbt, dev->nodename, | 415 | err = xenbus_printf(xbt, dev->nodename, |
413 | "discard-alignment", "%u", | 416 | "discard-alignment", "%u", |
414 | q->limits.discard_alignment); | 417 | q->limits.discard_alignment); |
415 | if (err) { | 418 | if (err) { |
416 | xenbus_dev_fatal(dev, err, | 419 | xenbus_dev_fatal(dev, err, |
417 | "writing discard-alignment"); | 420 | "writing discard-alignment"); |
418 | goto kfree; | 421 | goto kfree; |
419 | } | 422 | } |
420 | state = 1; | 423 | state = 1; |
421 | blkif->blk_backend_type = BLKIF_BACKEND_PHY; | 424 | blkif->blk_backend_type = BLKIF_BACKEND_PHY; |
425 | } | ||
426 | /* Optional. */ | ||
427 | err = xenbus_printf(xbt, dev->nodename, | ||
428 | "discard-secure", "%d", | ||
429 | blkif->vbd.discard_secure); | ||
430 | if (err) { | ||
431 | xenbus_dev_fatal(dev, err, | ||
432 | "writting discard-secure"); | ||
433 | goto kfree; | ||
422 | } | 434 | } |
423 | } | 435 | } |
424 | } else { | 436 | } else { |
425 | err = PTR_ERR(type); | 437 | err = PTR_ERR(type); |
426 | xenbus_dev_fatal(dev, err, "reading type"); | 438 | xenbus_dev_fatal(dev, err, "reading type"); |
427 | goto out; | 439 | goto out; |
428 | } | 440 | } |
429 | 441 | ||
430 | err = xenbus_printf(xbt, dev->nodename, "feature-discard", | 442 | err = xenbus_printf(xbt, dev->nodename, "feature-discard", |
431 | "%d", state); | 443 | "%d", state); |
432 | if (err) | 444 | if (err) |
433 | xenbus_dev_fatal(dev, err, "writing feature-discard"); | 445 | xenbus_dev_fatal(dev, err, "writing feature-discard"); |
434 | kfree: | 446 | kfree: |
435 | kfree(type); | 447 | kfree(type); |
436 | out: | 448 | out: |
437 | return err; | 449 | return err; |
438 | } | 450 | } |
439 | int xen_blkbk_barrier(struct xenbus_transaction xbt, | 451 | int xen_blkbk_barrier(struct xenbus_transaction xbt, |
440 | struct backend_info *be, int state) | 452 | struct backend_info *be, int state) |
441 | { | 453 | { |
442 | struct xenbus_device *dev = be->dev; | 454 | struct xenbus_device *dev = be->dev; |
443 | int err; | 455 | int err; |
444 | 456 | ||
445 | err = xenbus_printf(xbt, dev->nodename, "feature-barrier", | 457 | err = xenbus_printf(xbt, dev->nodename, "feature-barrier", |
446 | "%d", state); | 458 | "%d", state); |
447 | if (err) | 459 | if (err) |
448 | xenbus_dev_fatal(dev, err, "writing feature-barrier"); | 460 | xenbus_dev_fatal(dev, err, "writing feature-barrier"); |
449 | 461 | ||
450 | return err; | 462 | return err; |
451 | } | 463 | } |
452 | 464 | ||
453 | /* | 465 | /* |
454 | * Entry point to this code when a new device is created. Allocate the basic | 466 | * Entry point to this code when a new device is created. Allocate the basic |
455 | * structures, and watch the store waiting for the hotplug scripts to tell us | 467 | * structures, and watch the store waiting for the hotplug scripts to tell us |
456 | * the device's physical major and minor numbers. Switch to InitWait. | 468 | * the device's physical major and minor numbers. Switch to InitWait. |
457 | */ | 469 | */ |
458 | static int xen_blkbk_probe(struct xenbus_device *dev, | 470 | static int xen_blkbk_probe(struct xenbus_device *dev, |
459 | const struct xenbus_device_id *id) | 471 | const struct xenbus_device_id *id) |
460 | { | 472 | { |
461 | int err; | 473 | int err; |
462 | struct backend_info *be = kzalloc(sizeof(struct backend_info), | 474 | struct backend_info *be = kzalloc(sizeof(struct backend_info), |
463 | GFP_KERNEL); | 475 | GFP_KERNEL); |
464 | if (!be) { | 476 | if (!be) { |
465 | xenbus_dev_fatal(dev, -ENOMEM, | 477 | xenbus_dev_fatal(dev, -ENOMEM, |
466 | "allocating backend structure"); | 478 | "allocating backend structure"); |
467 | return -ENOMEM; | 479 | return -ENOMEM; |
468 | } | 480 | } |
469 | be->dev = dev; | 481 | be->dev = dev; |
470 | dev_set_drvdata(&dev->dev, be); | 482 | dev_set_drvdata(&dev->dev, be); |
471 | 483 | ||
472 | be->blkif = xen_blkif_alloc(dev->otherend_id); | 484 | be->blkif = xen_blkif_alloc(dev->otherend_id); |
473 | if (IS_ERR(be->blkif)) { | 485 | if (IS_ERR(be->blkif)) { |
474 | err = PTR_ERR(be->blkif); | 486 | err = PTR_ERR(be->blkif); |
475 | be->blkif = NULL; | 487 | be->blkif = NULL; |
476 | xenbus_dev_fatal(dev, err, "creating block interface"); | 488 | xenbus_dev_fatal(dev, err, "creating block interface"); |
477 | goto fail; | 489 | goto fail; |
478 | } | 490 | } |
479 | 491 | ||
480 | /* setup back pointer */ | 492 | /* setup back pointer */ |
481 | be->blkif->be = be; | 493 | be->blkif->be = be; |
482 | 494 | ||
483 | err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, | 495 | err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, |
484 | "%s/%s", dev->nodename, "physical-device"); | 496 | "%s/%s", dev->nodename, "physical-device"); |
485 | if (err) | 497 | if (err) |
486 | goto fail; | 498 | goto fail; |
487 | 499 | ||
488 | err = xenbus_switch_state(dev, XenbusStateInitWait); | 500 | err = xenbus_switch_state(dev, XenbusStateInitWait); |
489 | if (err) | 501 | if (err) |
490 | goto fail; | 502 | goto fail; |
491 | 503 | ||
492 | return 0; | 504 | return 0; |
493 | 505 | ||
494 | fail: | 506 | fail: |
495 | DPRINTK("failed"); | 507 | DPRINTK("failed"); |
496 | xen_blkbk_remove(dev); | 508 | xen_blkbk_remove(dev); |
497 | return err; | 509 | return err; |
498 | } | 510 | } |
499 | 511 | ||
500 | 512 | ||
501 | /* | 513 | /* |
502 | * Callback received when the hotplug scripts have placed the physical-device | 514 | * Callback received when the hotplug scripts have placed the physical-device |
503 | * node. Read it and the mode node, and create a vbd. If the frontend is | 515 | * node. Read it and the mode node, and create a vbd. If the frontend is |
504 | * ready, connect. | 516 | * ready, connect. |
505 | */ | 517 | */ |
506 | static void backend_changed(struct xenbus_watch *watch, | 518 | static void backend_changed(struct xenbus_watch *watch, |
507 | const char **vec, unsigned int len) | 519 | const char **vec, unsigned int len) |
508 | { | 520 | { |
509 | int err; | 521 | int err; |
510 | unsigned major; | 522 | unsigned major; |
511 | unsigned minor; | 523 | unsigned minor; |
512 | struct backend_info *be | 524 | struct backend_info *be |
513 | = container_of(watch, struct backend_info, backend_watch); | 525 | = container_of(watch, struct backend_info, backend_watch); |
514 | struct xenbus_device *dev = be->dev; | 526 | struct xenbus_device *dev = be->dev; |
515 | int cdrom = 0; | 527 | int cdrom = 0; |
516 | char *device_type; | 528 | char *device_type; |
517 | 529 | ||
518 | DPRINTK(""); | 530 | DPRINTK(""); |
519 | 531 | ||
520 | err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", | 532 | err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", |
521 | &major, &minor); | 533 | &major, &minor); |
522 | if (XENBUS_EXIST_ERR(err)) { | 534 | if (XENBUS_EXIST_ERR(err)) { |
523 | /* | 535 | /* |
524 | * Since this watch will fire once immediately after it is | 536 | * Since this watch will fire once immediately after it is |
525 | * registered, we expect this. Ignore it, and wait for the | 537 | * registered, we expect this. Ignore it, and wait for the |
526 | * hotplug scripts. | 538 | * hotplug scripts. |
527 | */ | 539 | */ |
528 | return; | 540 | return; |
529 | } | 541 | } |
530 | if (err != 2) { | 542 | if (err != 2) { |
531 | xenbus_dev_fatal(dev, err, "reading physical-device"); | 543 | xenbus_dev_fatal(dev, err, "reading physical-device"); |
532 | return; | 544 | return; |
533 | } | 545 | } |
534 | 546 | ||
535 | if ((be->major || be->minor) && | 547 | if ((be->major || be->minor) && |
536 | ((be->major != major) || (be->minor != minor))) { | 548 | ((be->major != major) || (be->minor != minor))) { |
537 | pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", | 549 | pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", |
538 | be->major, be->minor, major, minor); | 550 | be->major, be->minor, major, minor); |
539 | return; | 551 | return; |
540 | } | 552 | } |
541 | 553 | ||
542 | be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); | 554 | be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); |
543 | if (IS_ERR(be->mode)) { | 555 | if (IS_ERR(be->mode)) { |
544 | err = PTR_ERR(be->mode); | 556 | err = PTR_ERR(be->mode); |
545 | be->mode = NULL; | 557 | be->mode = NULL; |
546 | xenbus_dev_fatal(dev, err, "reading mode"); | 558 | xenbus_dev_fatal(dev, err, "reading mode"); |
547 | return; | 559 | return; |
548 | } | 560 | } |
549 | 561 | ||
550 | device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); | 562 | device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); |
551 | if (!IS_ERR(device_type)) { | 563 | if (!IS_ERR(device_type)) { |
552 | cdrom = strcmp(device_type, "cdrom") == 0; | 564 | cdrom = strcmp(device_type, "cdrom") == 0; |
553 | kfree(device_type); | 565 | kfree(device_type); |
554 | } | 566 | } |
555 | 567 | ||
556 | if (be->major == 0 && be->minor == 0) { | 568 | if (be->major == 0 && be->minor == 0) { |
557 | /* Front end dir is a number, which is used as the handle. */ | 569 | /* Front end dir is a number, which is used as the handle. */ |
558 | 570 | ||
559 | char *p = strrchr(dev->otherend, '/') + 1; | 571 | char *p = strrchr(dev->otherend, '/') + 1; |
560 | long handle; | 572 | long handle; |
561 | err = strict_strtoul(p, 0, &handle); | 573 | err = strict_strtoul(p, 0, &handle); |
562 | if (err) | 574 | if (err) |
563 | return; | 575 | return; |
564 | 576 | ||
565 | be->major = major; | 577 | be->major = major; |
566 | be->minor = minor; | 578 | be->minor = minor; |
567 | 579 | ||
568 | err = xen_vbd_create(be->blkif, handle, major, minor, | 580 | err = xen_vbd_create(be->blkif, handle, major, minor, |
569 | (NULL == strchr(be->mode, 'w')), cdrom); | 581 | (NULL == strchr(be->mode, 'w')), cdrom); |
570 | if (err) { | 582 | if (err) { |
571 | be->major = 0; | 583 | be->major = 0; |
572 | be->minor = 0; | 584 | be->minor = 0; |
573 | xenbus_dev_fatal(dev, err, "creating vbd structure"); | 585 | xenbus_dev_fatal(dev, err, "creating vbd structure"); |
574 | return; | 586 | return; |
575 | } | 587 | } |
576 | 588 | ||
577 | err = xenvbd_sysfs_addif(dev); | 589 | err = xenvbd_sysfs_addif(dev); |
578 | if (err) { | 590 | if (err) { |
579 | xen_vbd_free(&be->blkif->vbd); | 591 | xen_vbd_free(&be->blkif->vbd); |
580 | be->major = 0; | 592 | be->major = 0; |
581 | be->minor = 0; | 593 | be->minor = 0; |
582 | xenbus_dev_fatal(dev, err, "creating sysfs entries"); | 594 | xenbus_dev_fatal(dev, err, "creating sysfs entries"); |
583 | return; | 595 | return; |
584 | } | 596 | } |
585 | 597 | ||
586 | /* We're potentially connected now */ | 598 | /* We're potentially connected now */ |
587 | xen_update_blkif_status(be->blkif); | 599 | xen_update_blkif_status(be->blkif); |
588 | } | 600 | } |
589 | } | 601 | } |
590 | 602 | ||
591 | 603 | ||
592 | /* | 604 | /* |
593 | * Callback received when the frontend's state changes. | 605 | * Callback received when the frontend's state changes. |
594 | */ | 606 | */ |
595 | static void frontend_changed(struct xenbus_device *dev, | 607 | static void frontend_changed(struct xenbus_device *dev, |
596 | enum xenbus_state frontend_state) | 608 | enum xenbus_state frontend_state) |
597 | { | 609 | { |
598 | struct backend_info *be = dev_get_drvdata(&dev->dev); | 610 | struct backend_info *be = dev_get_drvdata(&dev->dev); |
599 | int err; | 611 | int err; |
600 | 612 | ||
601 | DPRINTK("%s", xenbus_strstate(frontend_state)); | 613 | DPRINTK("%s", xenbus_strstate(frontend_state)); |
602 | 614 | ||
603 | switch (frontend_state) { | 615 | switch (frontend_state) { |
604 | case XenbusStateInitialising: | 616 | case XenbusStateInitialising: |
605 | if (dev->state == XenbusStateClosed) { | 617 | if (dev->state == XenbusStateClosed) { |
606 | pr_info(DRV_PFX "%s: prepare for reconnect\n", | 618 | pr_info(DRV_PFX "%s: prepare for reconnect\n", |
607 | dev->nodename); | 619 | dev->nodename); |
608 | xenbus_switch_state(dev, XenbusStateInitWait); | 620 | xenbus_switch_state(dev, XenbusStateInitWait); |
609 | } | 621 | } |
610 | break; | 622 | break; |
611 | 623 | ||
612 | case XenbusStateInitialised: | 624 | case XenbusStateInitialised: |
613 | case XenbusStateConnected: | 625 | case XenbusStateConnected: |
614 | /* | 626 | /* |
615 | * Ensure we connect even when two watches fire in | 627 | * Ensure we connect even when two watches fire in |
616 | * close succession and we miss the intermediate value | 628 | * close succession and we miss the intermediate value |
617 | * of frontend_state. | 629 | * of frontend_state. |
618 | */ | 630 | */ |
619 | if (dev->state == XenbusStateConnected) | 631 | if (dev->state == XenbusStateConnected) |
620 | break; | 632 | break; |
621 | 633 | ||
622 | /* | 634 | /* |
623 | * Enforce precondition before potential leak point. | 635 | * Enforce precondition before potential leak point. |
624 | * xen_blkif_disconnect() is idempotent. | 636 | * xen_blkif_disconnect() is idempotent. |
625 | */ | 637 | */ |
626 | xen_blkif_disconnect(be->blkif); | 638 | xen_blkif_disconnect(be->blkif); |
627 | 639 | ||
628 | err = connect_ring(be); | 640 | err = connect_ring(be); |
629 | if (err) | 641 | if (err) |
630 | break; | 642 | break; |
631 | xen_update_blkif_status(be->blkif); | 643 | xen_update_blkif_status(be->blkif); |
632 | break; | 644 | break; |
633 | 645 | ||
634 | case XenbusStateClosing: | 646 | case XenbusStateClosing: |
635 | xenbus_switch_state(dev, XenbusStateClosing); | 647 | xenbus_switch_state(dev, XenbusStateClosing); |
636 | break; | 648 | break; |
637 | 649 | ||
638 | case XenbusStateClosed: | 650 | case XenbusStateClosed: |
639 | xen_blkif_disconnect(be->blkif); | 651 | xen_blkif_disconnect(be->blkif); |
640 | xenbus_switch_state(dev, XenbusStateClosed); | 652 | xenbus_switch_state(dev, XenbusStateClosed); |
641 | if (xenbus_dev_is_online(dev)) | 653 | if (xenbus_dev_is_online(dev)) |
642 | break; | 654 | break; |
643 | /* fall through if not online */ | 655 | /* fall through if not online */ |
644 | case XenbusStateUnknown: | 656 | case XenbusStateUnknown: |
645 | /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ | 657 | /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ |
646 | device_unregister(&dev->dev); | 658 | device_unregister(&dev->dev); |
647 | break; | 659 | break; |
648 | 660 | ||
649 | default: | 661 | default: |
650 | xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | 662 | xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", |
651 | frontend_state); | 663 | frontend_state); |
652 | break; | 664 | break; |
653 | } | 665 | } |
654 | } | 666 | } |
655 | 667 | ||
656 | 668 | ||
657 | /* ** Connection ** */ | 669 | /* ** Connection ** */ |
658 | 670 | ||
659 | 671 | ||
660 | /* | 672 | /* |
661 | * Write the physical details regarding the block device to the store, and | 673 | * Write the physical details regarding the block device to the store, and |
662 | * switch to Connected state. | 674 | * switch to Connected state. |
663 | */ | 675 | */ |
664 | static void connect(struct backend_info *be) | 676 | static void connect(struct backend_info *be) |
665 | { | 677 | { |
666 | struct xenbus_transaction xbt; | 678 | struct xenbus_transaction xbt; |
667 | int err; | 679 | int err; |
668 | struct xenbus_device *dev = be->dev; | 680 | struct xenbus_device *dev = be->dev; |
669 | 681 | ||
670 | DPRINTK("%s", dev->otherend); | 682 | DPRINTK("%s", dev->otherend); |
671 | 683 | ||
672 | /* Supply the information about the device the frontend needs */ | 684 | /* Supply the information about the device the frontend needs */ |
673 | again: | 685 | again: |
674 | err = xenbus_transaction_start(&xbt); | 686 | err = xenbus_transaction_start(&xbt); |
675 | if (err) { | 687 | if (err) { |
676 | xenbus_dev_fatal(dev, err, "starting transaction"); | 688 | xenbus_dev_fatal(dev, err, "starting transaction"); |
677 | return; | 689 | return; |
678 | } | 690 | } |
679 | 691 | ||
680 | err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); | 692 | err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); |
681 | if (err) | 693 | if (err) |
682 | goto abort; | 694 | goto abort; |
683 | 695 | ||
684 | err = xen_blkbk_discard(xbt, be); | 696 | err = xen_blkbk_discard(xbt, be); |
685 | 697 | ||
686 | /* If we can't advertise it is OK. */ | 698 | /* If we can't advertise it is OK. */ |
687 | err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); | 699 | err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); |
688 | 700 | ||
689 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | 701 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", |
690 | (unsigned long long)vbd_sz(&be->blkif->vbd)); | 702 | (unsigned long long)vbd_sz(&be->blkif->vbd)); |
691 | if (err) { | 703 | if (err) { |
692 | xenbus_dev_fatal(dev, err, "writing %s/sectors", | 704 | xenbus_dev_fatal(dev, err, "writing %s/sectors", |
693 | dev->nodename); | 705 | dev->nodename); |
694 | goto abort; | 706 | goto abort; |
695 | } | 707 | } |
696 | 708 | ||
697 | /* FIXME: use a typename instead */ | 709 | /* FIXME: use a typename instead */ |
698 | err = xenbus_printf(xbt, dev->nodename, "info", "%u", | 710 | err = xenbus_printf(xbt, dev->nodename, "info", "%u", |
699 | be->blkif->vbd.type | | 711 | be->blkif->vbd.type | |
700 | (be->blkif->vbd.readonly ? VDISK_READONLY : 0)); | 712 | (be->blkif->vbd.readonly ? VDISK_READONLY : 0)); |
701 | if (err) { | 713 | if (err) { |
702 | xenbus_dev_fatal(dev, err, "writing %s/info", | 714 | xenbus_dev_fatal(dev, err, "writing %s/info", |
703 | dev->nodename); | 715 | dev->nodename); |
704 | goto abort; | 716 | goto abort; |
705 | } | 717 | } |
706 | err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", | 718 | err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", |
707 | (unsigned long) | 719 | (unsigned long) |
708 | bdev_logical_block_size(be->blkif->vbd.bdev)); | 720 | bdev_logical_block_size(be->blkif->vbd.bdev)); |
709 | if (err) { | 721 | if (err) { |
710 | xenbus_dev_fatal(dev, err, "writing %s/sector-size", | 722 | xenbus_dev_fatal(dev, err, "writing %s/sector-size", |
711 | dev->nodename); | 723 | dev->nodename); |
712 | goto abort; | 724 | goto abort; |
713 | } | 725 | } |
714 | 726 | ||
715 | err = xenbus_transaction_end(xbt, 0); | 727 | err = xenbus_transaction_end(xbt, 0); |
716 | if (err == -EAGAIN) | 728 | if (err == -EAGAIN) |
717 | goto again; | 729 | goto again; |
718 | if (err) | 730 | if (err) |
719 | xenbus_dev_fatal(dev, err, "ending transaction"); | 731 | xenbus_dev_fatal(dev, err, "ending transaction"); |
720 | 732 | ||
721 | err = xenbus_switch_state(dev, XenbusStateConnected); | 733 | err = xenbus_switch_state(dev, XenbusStateConnected); |
722 | if (err) | 734 | if (err) |
723 | xenbus_dev_fatal(dev, err, "%s: switching to Connected state", | 735 | xenbus_dev_fatal(dev, err, "%s: switching to Connected state", |
724 | dev->nodename); | 736 | dev->nodename); |
725 | 737 | ||
726 | return; | 738 | return; |
727 | abort: | 739 | abort: |
728 | xenbus_transaction_end(xbt, 1); | 740 | xenbus_transaction_end(xbt, 1); |
729 | } | 741 | } |
730 | 742 | ||
731 | 743 | ||
732 | static int connect_ring(struct backend_info *be) | 744 | static int connect_ring(struct backend_info *be) |
733 | { | 745 | { |
734 | struct xenbus_device *dev = be->dev; | 746 | struct xenbus_device *dev = be->dev; |
735 | unsigned long ring_ref; | 747 | unsigned long ring_ref; |
736 | unsigned int evtchn; | 748 | unsigned int evtchn; |
737 | char protocol[64] = ""; | 749 | char protocol[64] = ""; |
738 | int err; | 750 | int err; |
739 | 751 | ||
740 | DPRINTK("%s", dev->otherend); | 752 | DPRINTK("%s", dev->otherend); |
741 | 753 | ||
742 | err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", | 754 | err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", |
743 | &ring_ref, "event-channel", "%u", &evtchn, NULL); | 755 | &ring_ref, "event-channel", "%u", &evtchn, NULL); |
744 | if (err) { | 756 | if (err) { |
745 | xenbus_dev_fatal(dev, err, | 757 | xenbus_dev_fatal(dev, err, |
746 | "reading %s/ring-ref and event-channel", | 758 | "reading %s/ring-ref and event-channel", |
747 | dev->otherend); | 759 | dev->otherend); |
748 | return err; | 760 | return err; |
749 | } | 761 | } |
750 | 762 | ||
751 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | 763 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; |
752 | err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", | 764 | err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", |
753 | "%63s", protocol, NULL); | 765 | "%63s", protocol, NULL); |
754 | if (err) | 766 | if (err) |
755 | strcpy(protocol, "unspecified, assuming native"); | 767 | strcpy(protocol, "unspecified, assuming native"); |
756 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) | 768 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) |
757 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | 769 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; |
758 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) | 770 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) |
759 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; | 771 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; |
760 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) | 772 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) |
761 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; | 773 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; |
762 | else { | 774 | else { |
763 | xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); | 775 | xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); |
764 | return -1; | 776 | return -1; |
765 | } | 777 | } |
766 | pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", | 778 | pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", |
767 | ring_ref, evtchn, be->blkif->blk_protocol, protocol); | 779 | ring_ref, evtchn, be->blkif->blk_protocol, protocol); |
768 | 780 | ||
769 | /* Map the shared frame, irq etc. */ | 781 | /* Map the shared frame, irq etc. */ |
770 | err = xen_blkif_map(be->blkif, ring_ref, evtchn); | 782 | err = xen_blkif_map(be->blkif, ring_ref, evtchn); |
771 | if (err) { | 783 | if (err) { |
772 | xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", | 784 | xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", |
773 | ring_ref, evtchn); | 785 | ring_ref, evtchn); |
774 | return err; | 786 | return err; |
775 | } | 787 | } |
776 | 788 | ||
777 | return 0; | 789 | return 0; |
778 | } | 790 | } |
779 | 791 | ||
780 | 792 | ||
781 | /* ** Driver Registration ** */ | 793 | /* ** Driver Registration ** */ |
782 | 794 | ||
783 | 795 | ||
784 | static const struct xenbus_device_id xen_blkbk_ids[] = { | 796 | static const struct xenbus_device_id xen_blkbk_ids[] = { |
785 | { "vbd" }, | 797 | { "vbd" }, |
786 | { "" } | 798 | { "" } |
787 | }; | 799 | }; |
788 | 800 | ||
789 | 801 | ||
790 | static DEFINE_XENBUS_DRIVER(xen_blkbk, , | 802 | static DEFINE_XENBUS_DRIVER(xen_blkbk, , |
791 | .probe = xen_blkbk_probe, | 803 | .probe = xen_blkbk_probe, |
792 | .remove = xen_blkbk_remove, | 804 | .remove = xen_blkbk_remove, |
793 | .otherend_changed = frontend_changed | 805 | .otherend_changed = frontend_changed |
794 | ); | 806 | ); |
795 | 807 | ||
796 | 808 | ||
797 | int xen_blkif_xenbus_init(void) | 809 | int xen_blkif_xenbus_init(void) |
798 | { | 810 | { |
799 | return xenbus_register_backend(&xen_blkbk_driver); | 811 | return xenbus_register_backend(&xen_blkbk_driver); |
800 | } | 812 | } |
801 | 813 |
drivers/block/xen-blkfront.c
1 | /* | 1 | /* |
2 | * blkfront.c | 2 | * blkfront.c |
3 | * | 3 | * |
4 | * XenLinux virtual block device driver. | 4 | * XenLinux virtual block device driver. |
5 | * | 5 | * |
6 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | 6 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand |
7 | * Modifications by Mark A. Williamson are (c) Intel Research Cambridge | 7 | * Modifications by Mark A. Williamson are (c) Intel Research Cambridge |
8 | * Copyright (c) 2004, Christian Limpach | 8 | * Copyright (c) 2004, Christian Limpach |
9 | * Copyright (c) 2004, Andrew Warfield | 9 | * Copyright (c) 2004, Andrew Warfield |
10 | * Copyright (c) 2005, Christopher Clark | 10 | * Copyright (c) 2005, Christopher Clark |
11 | * Copyright (c) 2005, XenSource Ltd | 11 | * Copyright (c) 2005, XenSource Ltd |
12 | * | 12 | * |
13 | * This program is free software; you can redistribute it and/or | 13 | * This program is free software; you can redistribute it and/or |
14 | * modify it under the terms of the GNU General Public License version 2 | 14 | * modify it under the terms of the GNU General Public License version 2 |
15 | * as published by the Free Software Foundation; or, when distributed | 15 | * as published by the Free Software Foundation; or, when distributed |
16 | * separately from the Linux kernel or incorporated into other | 16 | * separately from the Linux kernel or incorporated into other |
17 | * software packages, subject to the following license: | 17 | * software packages, subject to the following license: |
18 | * | 18 | * |
19 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 19 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
20 | * of this source file (the "Software"), to deal in the Software without | 20 | * of this source file (the "Software"), to deal in the Software without |
21 | * restriction, including without limitation the rights to use, copy, modify, | 21 | * restriction, including without limitation the rights to use, copy, modify, |
22 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 22 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
23 | * and to permit persons to whom the Software is furnished to do so, subject to | 23 | * and to permit persons to whom the Software is furnished to do so, subject to |
24 | * the following conditions: | 24 | * the following conditions: |
25 | * | 25 | * |
26 | * The above copyright notice and this permission notice shall be included in | 26 | * The above copyright notice and this permission notice shall be included in |
27 | * all copies or substantial portions of the Software. | 27 | * all copies or substantial portions of the Software. |
28 | * | 28 | * |
29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
31 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 31 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
32 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 32 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
33 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 33 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
34 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 34 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
35 | * IN THE SOFTWARE. | 35 | * IN THE SOFTWARE. |
36 | */ | 36 | */ |
37 | 37 | ||
38 | #include <linux/interrupt.h> | 38 | #include <linux/interrupt.h> |
39 | #include <linux/blkdev.h> | 39 | #include <linux/blkdev.h> |
40 | #include <linux/hdreg.h> | 40 | #include <linux/hdreg.h> |
41 | #include <linux/cdrom.h> | 41 | #include <linux/cdrom.h> |
42 | #include <linux/module.h> | 42 | #include <linux/module.h> |
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/mutex.h> | 44 | #include <linux/mutex.h> |
45 | #include <linux/scatterlist.h> | 45 | #include <linux/scatterlist.h> |
46 | 46 | ||
47 | #include <xen/xen.h> | 47 | #include <xen/xen.h> |
48 | #include <xen/xenbus.h> | 48 | #include <xen/xenbus.h> |
49 | #include <xen/grant_table.h> | 49 | #include <xen/grant_table.h> |
50 | #include <xen/events.h> | 50 | #include <xen/events.h> |
51 | #include <xen/page.h> | 51 | #include <xen/page.h> |
52 | #include <xen/platform_pci.h> | 52 | #include <xen/platform_pci.h> |
53 | 53 | ||
54 | #include <xen/interface/grant_table.h> | 54 | #include <xen/interface/grant_table.h> |
55 | #include <xen/interface/io/blkif.h> | 55 | #include <xen/interface/io/blkif.h> |
56 | #include <xen/interface/io/protocols.h> | 56 | #include <xen/interface/io/protocols.h> |
57 | 57 | ||
58 | #include <asm/xen/hypervisor.h> | 58 | #include <asm/xen/hypervisor.h> |
59 | 59 | ||
60 | enum blkif_state { | 60 | enum blkif_state { |
61 | BLKIF_STATE_DISCONNECTED, | 61 | BLKIF_STATE_DISCONNECTED, |
62 | BLKIF_STATE_CONNECTED, | 62 | BLKIF_STATE_CONNECTED, |
63 | BLKIF_STATE_SUSPENDED, | 63 | BLKIF_STATE_SUSPENDED, |
64 | }; | 64 | }; |
65 | 65 | ||
66 | struct blk_shadow { | 66 | struct blk_shadow { |
67 | struct blkif_request req; | 67 | struct blkif_request req; |
68 | struct request *request; | 68 | struct request *request; |
69 | unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 69 | unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
70 | }; | 70 | }; |
71 | 71 | ||
72 | static DEFINE_MUTEX(blkfront_mutex); | 72 | static DEFINE_MUTEX(blkfront_mutex); |
73 | static const struct block_device_operations xlvbd_block_fops; | 73 | static const struct block_device_operations xlvbd_block_fops; |
74 | 74 | ||
75 | #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) | 75 | #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) |
76 | 76 | ||
77 | /* | 77 | /* |
78 | * We have one of these per vbd, whether ide, scsi or 'other'. They | 78 | * We have one of these per vbd, whether ide, scsi or 'other'. They |
79 | * hang in private_data off the gendisk structure. We may end up | 79 | * hang in private_data off the gendisk structure. We may end up |
80 | * putting all kinds of interesting stuff here :-) | 80 | * putting all kinds of interesting stuff here :-) |
81 | */ | 81 | */ |
82 | struct blkfront_info | 82 | struct blkfront_info |
83 | { | 83 | { |
84 | struct mutex mutex; | 84 | struct mutex mutex; |
85 | struct xenbus_device *xbdev; | 85 | struct xenbus_device *xbdev; |
86 | struct gendisk *gd; | 86 | struct gendisk *gd; |
87 | int vdevice; | 87 | int vdevice; |
88 | blkif_vdev_t handle; | 88 | blkif_vdev_t handle; |
89 | enum blkif_state connected; | 89 | enum blkif_state connected; |
90 | int ring_ref; | 90 | int ring_ref; |
91 | struct blkif_front_ring ring; | 91 | struct blkif_front_ring ring; |
92 | struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 92 | struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
93 | unsigned int evtchn, irq; | 93 | unsigned int evtchn, irq; |
94 | struct request_queue *rq; | 94 | struct request_queue *rq; |
95 | struct work_struct work; | 95 | struct work_struct work; |
96 | struct gnttab_free_callback callback; | 96 | struct gnttab_free_callback callback; |
97 | struct blk_shadow shadow[BLK_RING_SIZE]; | 97 | struct blk_shadow shadow[BLK_RING_SIZE]; |
98 | unsigned long shadow_free; | 98 | unsigned long shadow_free; |
99 | unsigned int feature_flush; | 99 | unsigned int feature_flush; |
100 | unsigned int flush_op; | 100 | unsigned int flush_op; |
101 | unsigned int feature_discard; | 101 | unsigned int feature_discard:1; |
102 | unsigned int feature_secdiscard:1; | ||
102 | unsigned int discard_granularity; | 103 | unsigned int discard_granularity; |
103 | unsigned int discard_alignment; | 104 | unsigned int discard_alignment; |
104 | int is_ready; | 105 | int is_ready; |
105 | }; | 106 | }; |
106 | 107 | ||
107 | static DEFINE_SPINLOCK(blkif_io_lock); | 108 | static DEFINE_SPINLOCK(blkif_io_lock); |
108 | 109 | ||
109 | static unsigned int nr_minors; | 110 | static unsigned int nr_minors; |
110 | static unsigned long *minors; | 111 | static unsigned long *minors; |
111 | static DEFINE_SPINLOCK(minor_lock); | 112 | static DEFINE_SPINLOCK(minor_lock); |
112 | 113 | ||
113 | #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ | 114 | #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ |
114 | (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) | 115 | (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) |
115 | #define GRANT_INVALID_REF 0 | 116 | #define GRANT_INVALID_REF 0 |
116 | 117 | ||
117 | #define PARTS_PER_DISK 16 | 118 | #define PARTS_PER_DISK 16 |
118 | #define PARTS_PER_EXT_DISK 256 | 119 | #define PARTS_PER_EXT_DISK 256 |
119 | 120 | ||
120 | #define BLKIF_MAJOR(dev) ((dev)>>8) | 121 | #define BLKIF_MAJOR(dev) ((dev)>>8) |
121 | #define BLKIF_MINOR(dev) ((dev) & 0xff) | 122 | #define BLKIF_MINOR(dev) ((dev) & 0xff) |
122 | 123 | ||
123 | #define EXT_SHIFT 28 | 124 | #define EXT_SHIFT 28 |
124 | #define EXTENDED (1<<EXT_SHIFT) | 125 | #define EXTENDED (1<<EXT_SHIFT) |
125 | #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) | 126 | #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) |
126 | #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) | 127 | #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) |
127 | #define EMULATED_HD_DISK_MINOR_OFFSET (0) | 128 | #define EMULATED_HD_DISK_MINOR_OFFSET (0) |
128 | #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) | 129 | #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) |
129 | #define EMULATED_SD_DISK_MINOR_OFFSET (0) | 130 | #define EMULATED_SD_DISK_MINOR_OFFSET (0) |
130 | #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256) | 131 | #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256) |
131 | 132 | ||
132 | #define DEV_NAME "xvd" /* name in /dev */ | 133 | #define DEV_NAME "xvd" /* name in /dev */ |
133 | 134 | ||
134 | static int get_id_from_freelist(struct blkfront_info *info) | 135 | static int get_id_from_freelist(struct blkfront_info *info) |
135 | { | 136 | { |
136 | unsigned long free = info->shadow_free; | 137 | unsigned long free = info->shadow_free; |
137 | BUG_ON(free >= BLK_RING_SIZE); | 138 | BUG_ON(free >= BLK_RING_SIZE); |
138 | info->shadow_free = info->shadow[free].req.id; | 139 | info->shadow_free = info->shadow[free].req.u.rw.id; |
139 | info->shadow[free].req.id = 0x0fffffee; /* debug */ | 140 | info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ |
140 | return free; | 141 | return free; |
141 | } | 142 | } |
142 | 143 | ||
143 | static void add_id_to_freelist(struct blkfront_info *info, | 144 | static void add_id_to_freelist(struct blkfront_info *info, |
144 | unsigned long id) | 145 | unsigned long id) |
145 | { | 146 | { |
146 | info->shadow[id].req.id = info->shadow_free; | 147 | info->shadow[id].req.u.rw.id = info->shadow_free; |
147 | info->shadow[id].request = NULL; | 148 | info->shadow[id].request = NULL; |
148 | info->shadow_free = id; | 149 | info->shadow_free = id; |
149 | } | 150 | } |
150 | 151 | ||
151 | static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) | 152 | static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) |
152 | { | 153 | { |
153 | unsigned int end = minor + nr; | 154 | unsigned int end = minor + nr; |
154 | int rc; | 155 | int rc; |
155 | 156 | ||
156 | if (end > nr_minors) { | 157 | if (end > nr_minors) { |
157 | unsigned long *bitmap, *old; | 158 | unsigned long *bitmap, *old; |
158 | 159 | ||
159 | bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), | 160 | bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap), |
160 | GFP_KERNEL); | 161 | GFP_KERNEL); |
161 | if (bitmap == NULL) | 162 | if (bitmap == NULL) |
162 | return -ENOMEM; | 163 | return -ENOMEM; |
163 | 164 | ||
164 | spin_lock(&minor_lock); | 165 | spin_lock(&minor_lock); |
165 | if (end > nr_minors) { | 166 | if (end > nr_minors) { |
166 | old = minors; | 167 | old = minors; |
167 | memcpy(bitmap, minors, | 168 | memcpy(bitmap, minors, |
168 | BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); | 169 | BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); |
169 | minors = bitmap; | 170 | minors = bitmap; |
170 | nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; | 171 | nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; |
171 | } else | 172 | } else |
172 | old = bitmap; | 173 | old = bitmap; |
173 | spin_unlock(&minor_lock); | 174 | spin_unlock(&minor_lock); |
174 | kfree(old); | 175 | kfree(old); |
175 | } | 176 | } |
176 | 177 | ||
177 | spin_lock(&minor_lock); | 178 | spin_lock(&minor_lock); |
178 | if (find_next_bit(minors, end, minor) >= end) { | 179 | if (find_next_bit(minors, end, minor) >= end) { |
179 | for (; minor < end; ++minor) | 180 | for (; minor < end; ++minor) |
180 | __set_bit(minor, minors); | 181 | __set_bit(minor, minors); |
181 | rc = 0; | 182 | rc = 0; |
182 | } else | 183 | } else |
183 | rc = -EBUSY; | 184 | rc = -EBUSY; |
184 | spin_unlock(&minor_lock); | 185 | spin_unlock(&minor_lock); |
185 | 186 | ||
186 | return rc; | 187 | return rc; |
187 | } | 188 | } |
188 | 189 | ||
189 | static void xlbd_release_minors(unsigned int minor, unsigned int nr) | 190 | static void xlbd_release_minors(unsigned int minor, unsigned int nr) |
190 | { | 191 | { |
191 | unsigned int end = minor + nr; | 192 | unsigned int end = minor + nr; |
192 | 193 | ||
193 | BUG_ON(end > nr_minors); | 194 | BUG_ON(end > nr_minors); |
194 | spin_lock(&minor_lock); | 195 | spin_lock(&minor_lock); |
195 | for (; minor < end; ++minor) | 196 | for (; minor < end; ++minor) |
196 | __clear_bit(minor, minors); | 197 | __clear_bit(minor, minors); |
197 | spin_unlock(&minor_lock); | 198 | spin_unlock(&minor_lock); |
198 | } | 199 | } |
199 | 200 | ||
200 | static void blkif_restart_queue_callback(void *arg) | 201 | static void blkif_restart_queue_callback(void *arg) |
201 | { | 202 | { |
202 | struct blkfront_info *info = (struct blkfront_info *)arg; | 203 | struct blkfront_info *info = (struct blkfront_info *)arg; |
203 | schedule_work(&info->work); | 204 | schedule_work(&info->work); |
204 | } | 205 | } |
205 | 206 | ||
206 | static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) | 207 | static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) |
207 | { | 208 | { |
208 | /* We don't have real geometry info, but let's at least return | 209 | /* We don't have real geometry info, but let's at least return |
209 | values consistent with the size of the device */ | 210 | values consistent with the size of the device */ |
210 | sector_t nsect = get_capacity(bd->bd_disk); | 211 | sector_t nsect = get_capacity(bd->bd_disk); |
211 | sector_t cylinders = nsect; | 212 | sector_t cylinders = nsect; |
212 | 213 | ||
213 | hg->heads = 0xff; | 214 | hg->heads = 0xff; |
214 | hg->sectors = 0x3f; | 215 | hg->sectors = 0x3f; |
215 | sector_div(cylinders, hg->heads * hg->sectors); | 216 | sector_div(cylinders, hg->heads * hg->sectors); |
216 | hg->cylinders = cylinders; | 217 | hg->cylinders = cylinders; |
217 | if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) | 218 | if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) |
218 | hg->cylinders = 0xffff; | 219 | hg->cylinders = 0xffff; |
219 | return 0; | 220 | return 0; |
220 | } | 221 | } |
221 | 222 | ||
222 | static int blkif_ioctl(struct block_device *bdev, fmode_t mode, | 223 | static int blkif_ioctl(struct block_device *bdev, fmode_t mode, |
223 | unsigned command, unsigned long argument) | 224 | unsigned command, unsigned long argument) |
224 | { | 225 | { |
225 | struct blkfront_info *info = bdev->bd_disk->private_data; | 226 | struct blkfront_info *info = bdev->bd_disk->private_data; |
226 | int i; | 227 | int i; |
227 | 228 | ||
228 | dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", | 229 | dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", |
229 | command, (long)argument); | 230 | command, (long)argument); |
230 | 231 | ||
231 | switch (command) { | 232 | switch (command) { |
232 | case CDROMMULTISESSION: | 233 | case CDROMMULTISESSION: |
233 | dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); | 234 | dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); |
234 | for (i = 0; i < sizeof(struct cdrom_multisession); i++) | 235 | for (i = 0; i < sizeof(struct cdrom_multisession); i++) |
235 | if (put_user(0, (char __user *)(argument + i))) | 236 | if (put_user(0, (char __user *)(argument + i))) |
236 | return -EFAULT; | 237 | return -EFAULT; |
237 | return 0; | 238 | return 0; |
238 | 239 | ||
239 | case CDROM_GET_CAPABILITY: { | 240 | case CDROM_GET_CAPABILITY: { |
240 | struct gendisk *gd = info->gd; | 241 | struct gendisk *gd = info->gd; |
241 | if (gd->flags & GENHD_FL_CD) | 242 | if (gd->flags & GENHD_FL_CD) |
242 | return 0; | 243 | return 0; |
243 | return -EINVAL; | 244 | return -EINVAL; |
244 | } | 245 | } |
245 | 246 | ||
246 | default: | 247 | default: |
247 | /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", | 248 | /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", |
248 | command);*/ | 249 | command);*/ |
249 | return -EINVAL; /* same return as native Linux */ | 250 | return -EINVAL; /* same return as native Linux */ |
250 | } | 251 | } |
251 | 252 | ||
252 | return 0; | 253 | return 0; |
253 | } | 254 | } |
254 | 255 | ||
255 | /* | 256 | /* |
256 | * Generate a Xen blkfront IO request from a blk layer request. Reads | 257 | * Generate a Xen blkfront IO request from a blk layer request. Reads |
257 | * and writes are handled as expected. | 258 | * and writes are handled as expected. |
258 | * | 259 | * |
259 | * @req: a request struct | 260 | * @req: a request struct |
260 | */ | 261 | */ |
261 | static int blkif_queue_request(struct request *req) | 262 | static int blkif_queue_request(struct request *req) |
262 | { | 263 | { |
263 | struct blkfront_info *info = req->rq_disk->private_data; | 264 | struct blkfront_info *info = req->rq_disk->private_data; |
264 | unsigned long buffer_mfn; | 265 | unsigned long buffer_mfn; |
265 | struct blkif_request *ring_req; | 266 | struct blkif_request *ring_req; |
266 | unsigned long id; | 267 | unsigned long id; |
267 | unsigned int fsect, lsect; | 268 | unsigned int fsect, lsect; |
268 | int i, ref; | 269 | int i, ref; |
269 | grant_ref_t gref_head; | 270 | grant_ref_t gref_head; |
270 | struct scatterlist *sg; | 271 | struct scatterlist *sg; |
271 | 272 | ||
272 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) | 273 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) |
273 | return 1; | 274 | return 1; |
274 | 275 | ||
275 | if (gnttab_alloc_grant_references( | 276 | if (gnttab_alloc_grant_references( |
276 | BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { | 277 | BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { |
277 | gnttab_request_free_callback( | 278 | gnttab_request_free_callback( |
278 | &info->callback, | 279 | &info->callback, |
279 | blkif_restart_queue_callback, | 280 | blkif_restart_queue_callback, |
280 | info, | 281 | info, |
281 | BLKIF_MAX_SEGMENTS_PER_REQUEST); | 282 | BLKIF_MAX_SEGMENTS_PER_REQUEST); |
282 | return 1; | 283 | return 1; |
283 | } | 284 | } |
284 | 285 | ||
285 | /* Fill out a communications ring structure. */ | 286 | /* Fill out a communications ring structure. */ |
286 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | 287 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); |
287 | id = get_id_from_freelist(info); | 288 | id = get_id_from_freelist(info); |
288 | info->shadow[id].request = req; | 289 | info->shadow[id].request = req; |
289 | 290 | ||
290 | ring_req->id = id; | 291 | ring_req->u.rw.id = id; |
291 | ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); | 292 | ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); |
292 | ring_req->handle = info->handle; | 293 | ring_req->u.rw.handle = info->handle; |
293 | 294 | ||
294 | ring_req->operation = rq_data_dir(req) ? | 295 | ring_req->operation = rq_data_dir(req) ? |
295 | BLKIF_OP_WRITE : BLKIF_OP_READ; | 296 | BLKIF_OP_WRITE : BLKIF_OP_READ; |
296 | 297 | ||
297 | if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { | 298 | if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { |
298 | /* | 299 | /* |
299 | * Ideally we can do an unordered flush-to-disk. In case the | 300 | * Ideally we can do an unordered flush-to-disk. In case the |
300 | * backend onlysupports barriers, use that. A barrier request | 301 | * backend onlysupports barriers, use that. A barrier request |
301 | * a superset of FUA, so we can implement it the same | 302 | * a superset of FUA, so we can implement it the same |
302 | * way. (It's also a FLUSH+FUA, since it is | 303 | * way. (It's also a FLUSH+FUA, since it is |
303 | * guaranteed ordered WRT previous writes.) | 304 | * guaranteed ordered WRT previous writes.) |
304 | */ | 305 | */ |
305 | ring_req->operation = info->flush_op; | 306 | ring_req->operation = info->flush_op; |
306 | } | 307 | } |
307 | 308 | ||
308 | if (unlikely(req->cmd_flags & REQ_DISCARD)) { | 309 | if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { |
309 | /* id, sector_number and handle are set above. */ | 310 | /* id, sector_number and handle are set above. */ |
310 | ring_req->operation = BLKIF_OP_DISCARD; | 311 | ring_req->operation = BLKIF_OP_DISCARD; |
311 | ring_req->nr_segments = 0; | ||
312 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); | 312 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); |
313 | if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) | ||
314 | ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; | ||
315 | else | ||
316 | ring_req->u.discard.flag = 0; | ||
313 | } else { | 317 | } else { |
314 | ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); | 318 | ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, |
315 | BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); | 319 | info->sg); |
320 | BUG_ON(ring_req->u.rw.nr_segments > | ||
321 | BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
316 | 322 | ||
317 | for_each_sg(info->sg, sg, ring_req->nr_segments, i) { | 323 | for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { |
318 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); | 324 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); |
319 | fsect = sg->offset >> 9; | 325 | fsect = sg->offset >> 9; |
320 | lsect = fsect + (sg->length >> 9) - 1; | 326 | lsect = fsect + (sg->length >> 9) - 1; |
321 | /* install a grant reference. */ | 327 | /* install a grant reference. */ |
322 | ref = gnttab_claim_grant_reference(&gref_head); | 328 | ref = gnttab_claim_grant_reference(&gref_head); |
323 | BUG_ON(ref == -ENOSPC); | 329 | BUG_ON(ref == -ENOSPC); |
324 | 330 | ||
325 | gnttab_grant_foreign_access_ref( | 331 | gnttab_grant_foreign_access_ref( |
326 | ref, | 332 | ref, |
327 | info->xbdev->otherend_id, | 333 | info->xbdev->otherend_id, |
328 | buffer_mfn, | 334 | buffer_mfn, |
329 | rq_data_dir(req)); | 335 | rq_data_dir(req)); |
330 | 336 | ||
331 | info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); | 337 | info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); |
332 | ring_req->u.rw.seg[i] = | 338 | ring_req->u.rw.seg[i] = |
333 | (struct blkif_request_segment) { | 339 | (struct blkif_request_segment) { |
334 | .gref = ref, | 340 | .gref = ref, |
335 | .first_sect = fsect, | 341 | .first_sect = fsect, |
336 | .last_sect = lsect }; | 342 | .last_sect = lsect }; |
337 | } | 343 | } |
338 | } | 344 | } |
339 | 345 | ||
340 | info->ring.req_prod_pvt++; | 346 | info->ring.req_prod_pvt++; |
341 | 347 | ||
342 | /* Keep a private copy so we can reissue requests when recovering. */ | 348 | /* Keep a private copy so we can reissue requests when recovering. */ |
343 | info->shadow[id].req = *ring_req; | 349 | info->shadow[id].req = *ring_req; |
344 | 350 | ||
345 | gnttab_free_grant_references(gref_head); | 351 | gnttab_free_grant_references(gref_head); |
346 | 352 | ||
347 | return 0; | 353 | return 0; |
348 | } | 354 | } |
349 | 355 | ||
350 | 356 | ||
351 | static inline void flush_requests(struct blkfront_info *info) | 357 | static inline void flush_requests(struct blkfront_info *info) |
352 | { | 358 | { |
353 | int notify; | 359 | int notify; |
354 | 360 | ||
355 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); | 361 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); |
356 | 362 | ||
357 | if (notify) | 363 | if (notify) |
358 | notify_remote_via_irq(info->irq); | 364 | notify_remote_via_irq(info->irq); |
359 | } | 365 | } |
360 | 366 | ||
361 | /* | 367 | /* |
362 | * do_blkif_request | 368 | * do_blkif_request |
363 | * read a block; request is in a request queue | 369 | * read a block; request is in a request queue |
364 | */ | 370 | */ |
365 | static void do_blkif_request(struct request_queue *rq) | 371 | static void do_blkif_request(struct request_queue *rq) |
366 | { | 372 | { |
367 | struct blkfront_info *info = NULL; | 373 | struct blkfront_info *info = NULL; |
368 | struct request *req; | 374 | struct request *req; |
369 | int queued; | 375 | int queued; |
370 | 376 | ||
371 | pr_debug("Entered do_blkif_request\n"); | 377 | pr_debug("Entered do_blkif_request\n"); |
372 | 378 | ||
373 | queued = 0; | 379 | queued = 0; |
374 | 380 | ||
375 | while ((req = blk_peek_request(rq)) != NULL) { | 381 | while ((req = blk_peek_request(rq)) != NULL) { |
376 | info = req->rq_disk->private_data; | 382 | info = req->rq_disk->private_data; |
377 | 383 | ||
378 | if (RING_FULL(&info->ring)) | 384 | if (RING_FULL(&info->ring)) |
379 | goto wait; | 385 | goto wait; |
380 | 386 | ||
381 | blk_start_request(req); | 387 | blk_start_request(req); |
382 | 388 | ||
383 | if ((req->cmd_type != REQ_TYPE_FS) || | 389 | if ((req->cmd_type != REQ_TYPE_FS) || |
384 | ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && | 390 | ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && |
385 | !info->flush_op)) { | 391 | !info->flush_op)) { |
386 | __blk_end_request_all(req, -EIO); | 392 | __blk_end_request_all(req, -EIO); |
387 | continue; | 393 | continue; |
388 | } | 394 | } |
389 | 395 | ||
390 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " | 396 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " |
391 | "(%u/%u) buffer:%p [%s]\n", | 397 | "(%u/%u) buffer:%p [%s]\n", |
392 | req, req->cmd, (unsigned long)blk_rq_pos(req), | 398 | req, req->cmd, (unsigned long)blk_rq_pos(req), |
393 | blk_rq_cur_sectors(req), blk_rq_sectors(req), | 399 | blk_rq_cur_sectors(req), blk_rq_sectors(req), |
394 | req->buffer, rq_data_dir(req) ? "write" : "read"); | 400 | req->buffer, rq_data_dir(req) ? "write" : "read"); |
395 | 401 | ||
396 | if (blkif_queue_request(req)) { | 402 | if (blkif_queue_request(req)) { |
397 | blk_requeue_request(rq, req); | 403 | blk_requeue_request(rq, req); |
398 | wait: | 404 | wait: |
399 | /* Avoid pointless unplugs. */ | 405 | /* Avoid pointless unplugs. */ |
400 | blk_stop_queue(rq); | 406 | blk_stop_queue(rq); |
401 | break; | 407 | break; |
402 | } | 408 | } |
403 | 409 | ||
404 | queued++; | 410 | queued++; |
405 | } | 411 | } |
406 | 412 | ||
407 | if (queued != 0) | 413 | if (queued != 0) |
408 | flush_requests(info); | 414 | flush_requests(info); |
409 | } | 415 | } |
410 | 416 | ||
411 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | 417 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) |
412 | { | 418 | { |
413 | struct request_queue *rq; | 419 | struct request_queue *rq; |
414 | struct blkfront_info *info = gd->private_data; | 420 | struct blkfront_info *info = gd->private_data; |
415 | 421 | ||
416 | rq = blk_init_queue(do_blkif_request, &blkif_io_lock); | 422 | rq = blk_init_queue(do_blkif_request, &blkif_io_lock); |
417 | if (rq == NULL) | 423 | if (rq == NULL) |
418 | return -1; | 424 | return -1; |
419 | 425 | ||
420 | queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); | 426 | queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); |
421 | 427 | ||
422 | if (info->feature_discard) { | 428 | if (info->feature_discard) { |
423 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); | 429 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); |
424 | blk_queue_max_discard_sectors(rq, get_capacity(gd)); | 430 | blk_queue_max_discard_sectors(rq, get_capacity(gd)); |
425 | rq->limits.discard_granularity = info->discard_granularity; | 431 | rq->limits.discard_granularity = info->discard_granularity; |
426 | rq->limits.discard_alignment = info->discard_alignment; | 432 | rq->limits.discard_alignment = info->discard_alignment; |
433 | if (info->feature_secdiscard) | ||
434 | queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq); | ||
427 | } | 435 | } |
428 | 436 | ||
429 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ | 437 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ |
430 | blk_queue_logical_block_size(rq, sector_size); | 438 | blk_queue_logical_block_size(rq, sector_size); |
431 | blk_queue_max_hw_sectors(rq, 512); | 439 | blk_queue_max_hw_sectors(rq, 512); |
432 | 440 | ||
433 | /* Each segment in a request is up to an aligned page in size. */ | 441 | /* Each segment in a request is up to an aligned page in size. */ |
434 | blk_queue_segment_boundary(rq, PAGE_SIZE - 1); | 442 | blk_queue_segment_boundary(rq, PAGE_SIZE - 1); |
435 | blk_queue_max_segment_size(rq, PAGE_SIZE); | 443 | blk_queue_max_segment_size(rq, PAGE_SIZE); |
436 | 444 | ||
437 | /* Ensure a merged request will fit in a single I/O ring slot. */ | 445 | /* Ensure a merged request will fit in a single I/O ring slot. */ |
438 | blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); | 446 | blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
439 | 447 | ||
440 | /* Make sure buffer addresses are sector-aligned. */ | 448 | /* Make sure buffer addresses are sector-aligned. */ |
441 | blk_queue_dma_alignment(rq, 511); | 449 | blk_queue_dma_alignment(rq, 511); |
442 | 450 | ||
443 | /* Make sure we don't use bounce buffers. */ | 451 | /* Make sure we don't use bounce buffers. */ |
444 | blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); | 452 | blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); |
445 | 453 | ||
446 | gd->queue = rq; | 454 | gd->queue = rq; |
447 | 455 | ||
448 | return 0; | 456 | return 0; |
449 | } | 457 | } |
450 | 458 | ||
451 | 459 | ||
452 | static void xlvbd_flush(struct blkfront_info *info) | 460 | static void xlvbd_flush(struct blkfront_info *info) |
453 | { | 461 | { |
454 | blk_queue_flush(info->rq, info->feature_flush); | 462 | blk_queue_flush(info->rq, info->feature_flush); |
455 | printk(KERN_INFO "blkfront: %s: %s: %s\n", | 463 | printk(KERN_INFO "blkfront: %s: %s: %s\n", |
456 | info->gd->disk_name, | 464 | info->gd->disk_name, |
457 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | 465 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? |
458 | "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? | 466 | "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? |
459 | "flush diskcache" : "barrier or flush"), | 467 | "flush diskcache" : "barrier or flush"), |
460 | info->feature_flush ? "enabled" : "disabled"); | 468 | info->feature_flush ? "enabled" : "disabled"); |
461 | } | 469 | } |
462 | 470 | ||
463 | static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) | 471 | static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) |
464 | { | 472 | { |
465 | int major; | 473 | int major; |
466 | major = BLKIF_MAJOR(vdevice); | 474 | major = BLKIF_MAJOR(vdevice); |
467 | *minor = BLKIF_MINOR(vdevice); | 475 | *minor = BLKIF_MINOR(vdevice); |
468 | switch (major) { | 476 | switch (major) { |
469 | case XEN_IDE0_MAJOR: | 477 | case XEN_IDE0_MAJOR: |
470 | *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET; | 478 | *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET; |
471 | *minor = ((*minor / 64) * PARTS_PER_DISK) + | 479 | *minor = ((*minor / 64) * PARTS_PER_DISK) + |
472 | EMULATED_HD_DISK_MINOR_OFFSET; | 480 | EMULATED_HD_DISK_MINOR_OFFSET; |
473 | break; | 481 | break; |
474 | case XEN_IDE1_MAJOR: | 482 | case XEN_IDE1_MAJOR: |
475 | *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET; | 483 | *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET; |
476 | *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) + | 484 | *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) + |
477 | EMULATED_HD_DISK_MINOR_OFFSET; | 485 | EMULATED_HD_DISK_MINOR_OFFSET; |
478 | break; | 486 | break; |
479 | case XEN_SCSI_DISK0_MAJOR: | 487 | case XEN_SCSI_DISK0_MAJOR: |
480 | *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET; | 488 | *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET; |
481 | *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET; | 489 | *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET; |
482 | break; | 490 | break; |
483 | case XEN_SCSI_DISK1_MAJOR: | 491 | case XEN_SCSI_DISK1_MAJOR: |
484 | case XEN_SCSI_DISK2_MAJOR: | 492 | case XEN_SCSI_DISK2_MAJOR: |
485 | case XEN_SCSI_DISK3_MAJOR: | 493 | case XEN_SCSI_DISK3_MAJOR: |
486 | case XEN_SCSI_DISK4_MAJOR: | 494 | case XEN_SCSI_DISK4_MAJOR: |
487 | case XEN_SCSI_DISK5_MAJOR: | 495 | case XEN_SCSI_DISK5_MAJOR: |
488 | case XEN_SCSI_DISK6_MAJOR: | 496 | case XEN_SCSI_DISK6_MAJOR: |
489 | case XEN_SCSI_DISK7_MAJOR: | 497 | case XEN_SCSI_DISK7_MAJOR: |
490 | *offset = (*minor / PARTS_PER_DISK) + | 498 | *offset = (*minor / PARTS_PER_DISK) + |
491 | ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) + | 499 | ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) + |
492 | EMULATED_SD_DISK_NAME_OFFSET; | 500 | EMULATED_SD_DISK_NAME_OFFSET; |
493 | *minor = *minor + | 501 | *minor = *minor + |
494 | ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) + | 502 | ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) + |
495 | EMULATED_SD_DISK_MINOR_OFFSET; | 503 | EMULATED_SD_DISK_MINOR_OFFSET; |
496 | break; | 504 | break; |
497 | case XEN_SCSI_DISK8_MAJOR: | 505 | case XEN_SCSI_DISK8_MAJOR: |
498 | case XEN_SCSI_DISK9_MAJOR: | 506 | case XEN_SCSI_DISK9_MAJOR: |
499 | case XEN_SCSI_DISK10_MAJOR: | 507 | case XEN_SCSI_DISK10_MAJOR: |
500 | case XEN_SCSI_DISK11_MAJOR: | 508 | case XEN_SCSI_DISK11_MAJOR: |
501 | case XEN_SCSI_DISK12_MAJOR: | 509 | case XEN_SCSI_DISK12_MAJOR: |
502 | case XEN_SCSI_DISK13_MAJOR: | 510 | case XEN_SCSI_DISK13_MAJOR: |
503 | case XEN_SCSI_DISK14_MAJOR: | 511 | case XEN_SCSI_DISK14_MAJOR: |
504 | case XEN_SCSI_DISK15_MAJOR: | 512 | case XEN_SCSI_DISK15_MAJOR: |
505 | *offset = (*minor / PARTS_PER_DISK) + | 513 | *offset = (*minor / PARTS_PER_DISK) + |
506 | ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) + | 514 | ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) + |
507 | EMULATED_SD_DISK_NAME_OFFSET; | 515 | EMULATED_SD_DISK_NAME_OFFSET; |
508 | *minor = *minor + | 516 | *minor = *minor + |
509 | ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) + | 517 | ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) + |
510 | EMULATED_SD_DISK_MINOR_OFFSET; | 518 | EMULATED_SD_DISK_MINOR_OFFSET; |
511 | break; | 519 | break; |
512 | case XENVBD_MAJOR: | 520 | case XENVBD_MAJOR: |
513 | *offset = *minor / PARTS_PER_DISK; | 521 | *offset = *minor / PARTS_PER_DISK; |
514 | break; | 522 | break; |
515 | default: | 523 | default: |
516 | printk(KERN_WARNING "blkfront: your disk configuration is " | 524 | printk(KERN_WARNING "blkfront: your disk configuration is " |
517 | "incorrect, please use an xvd device instead\n"); | 525 | "incorrect, please use an xvd device instead\n"); |
518 | return -ENODEV; | 526 | return -ENODEV; |
519 | } | 527 | } |
520 | return 0; | 528 | return 0; |
521 | } | 529 | } |
522 | 530 | ||
523 | static int xlvbd_alloc_gendisk(blkif_sector_t capacity, | 531 | static int xlvbd_alloc_gendisk(blkif_sector_t capacity, |
524 | struct blkfront_info *info, | 532 | struct blkfront_info *info, |
525 | u16 vdisk_info, u16 sector_size) | 533 | u16 vdisk_info, u16 sector_size) |
526 | { | 534 | { |
527 | struct gendisk *gd; | 535 | struct gendisk *gd; |
528 | int nr_minors = 1; | 536 | int nr_minors = 1; |
529 | int err; | 537 | int err; |
530 | unsigned int offset; | 538 | unsigned int offset; |
531 | int minor; | 539 | int minor; |
532 | int nr_parts; | 540 | int nr_parts; |
533 | 541 | ||
534 | BUG_ON(info->gd != NULL); | 542 | BUG_ON(info->gd != NULL); |
535 | BUG_ON(info->rq != NULL); | 543 | BUG_ON(info->rq != NULL); |
536 | 544 | ||
537 | if ((info->vdevice>>EXT_SHIFT) > 1) { | 545 | if ((info->vdevice>>EXT_SHIFT) > 1) { |
538 | /* this is above the extended range; something is wrong */ | 546 | /* this is above the extended range; something is wrong */ |
539 | printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); | 547 | printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); |
540 | return -ENODEV; | 548 | return -ENODEV; |
541 | } | 549 | } |
542 | 550 | ||
543 | if (!VDEV_IS_EXTENDED(info->vdevice)) { | 551 | if (!VDEV_IS_EXTENDED(info->vdevice)) { |
544 | err = xen_translate_vdev(info->vdevice, &minor, &offset); | 552 | err = xen_translate_vdev(info->vdevice, &minor, &offset); |
545 | if (err) | 553 | if (err) |
546 | return err; | 554 | return err; |
547 | nr_parts = PARTS_PER_DISK; | 555 | nr_parts = PARTS_PER_DISK; |
548 | } else { | 556 | } else { |
549 | minor = BLKIF_MINOR_EXT(info->vdevice); | 557 | minor = BLKIF_MINOR_EXT(info->vdevice); |
550 | nr_parts = PARTS_PER_EXT_DISK; | 558 | nr_parts = PARTS_PER_EXT_DISK; |
551 | offset = minor / nr_parts; | 559 | offset = minor / nr_parts; |
552 | if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4) | 560 | if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4) |
553 | printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " | 561 | printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " |
554 | "emulated IDE disks,\n\t choose an xvd device name" | 562 | "emulated IDE disks,\n\t choose an xvd device name" |
555 | "from xvde on\n", info->vdevice); | 563 | "from xvde on\n", info->vdevice); |
556 | } | 564 | } |
557 | err = -ENODEV; | 565 | err = -ENODEV; |
558 | 566 | ||
559 | if ((minor % nr_parts) == 0) | 567 | if ((minor % nr_parts) == 0) |
560 | nr_minors = nr_parts; | 568 | nr_minors = nr_parts; |
561 | 569 | ||
562 | err = xlbd_reserve_minors(minor, nr_minors); | 570 | err = xlbd_reserve_minors(minor, nr_minors); |
563 | if (err) | 571 | if (err) |
564 | goto out; | 572 | goto out; |
565 | err = -ENODEV; | 573 | err = -ENODEV; |
566 | 574 | ||
567 | gd = alloc_disk(nr_minors); | 575 | gd = alloc_disk(nr_minors); |
568 | if (gd == NULL) | 576 | if (gd == NULL) |
569 | goto release; | 577 | goto release; |
570 | 578 | ||
571 | if (nr_minors > 1) { | 579 | if (nr_minors > 1) { |
572 | if (offset < 26) | 580 | if (offset < 26) |
573 | sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); | 581 | sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); |
574 | else | 582 | else |
575 | sprintf(gd->disk_name, "%s%c%c", DEV_NAME, | 583 | sprintf(gd->disk_name, "%s%c%c", DEV_NAME, |
576 | 'a' + ((offset / 26)-1), 'a' + (offset % 26)); | 584 | 'a' + ((offset / 26)-1), 'a' + (offset % 26)); |
577 | } else { | 585 | } else { |
578 | if (offset < 26) | 586 | if (offset < 26) |
579 | sprintf(gd->disk_name, "%s%c%d", DEV_NAME, | 587 | sprintf(gd->disk_name, "%s%c%d", DEV_NAME, |
580 | 'a' + offset, | 588 | 'a' + offset, |
581 | minor & (nr_parts - 1)); | 589 | minor & (nr_parts - 1)); |
582 | else | 590 | else |
583 | sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, | 591 | sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, |
584 | 'a' + ((offset / 26) - 1), | 592 | 'a' + ((offset / 26) - 1), |
585 | 'a' + (offset % 26), | 593 | 'a' + (offset % 26), |
586 | minor & (nr_parts - 1)); | 594 | minor & (nr_parts - 1)); |
587 | } | 595 | } |
588 | 596 | ||
589 | gd->major = XENVBD_MAJOR; | 597 | gd->major = XENVBD_MAJOR; |
590 | gd->first_minor = minor; | 598 | gd->first_minor = minor; |
591 | gd->fops = &xlvbd_block_fops; | 599 | gd->fops = &xlvbd_block_fops; |
592 | gd->private_data = info; | 600 | gd->private_data = info; |
593 | gd->driverfs_dev = &(info->xbdev->dev); | 601 | gd->driverfs_dev = &(info->xbdev->dev); |
594 | set_capacity(gd, capacity); | 602 | set_capacity(gd, capacity); |
595 | 603 | ||
596 | if (xlvbd_init_blk_queue(gd, sector_size)) { | 604 | if (xlvbd_init_blk_queue(gd, sector_size)) { |
597 | del_gendisk(gd); | 605 | del_gendisk(gd); |
598 | goto release; | 606 | goto release; |
599 | } | 607 | } |
600 | 608 | ||
601 | info->rq = gd->queue; | 609 | info->rq = gd->queue; |
602 | info->gd = gd; | 610 | info->gd = gd; |
603 | 611 | ||
604 | xlvbd_flush(info); | 612 | xlvbd_flush(info); |
605 | 613 | ||
606 | if (vdisk_info & VDISK_READONLY) | 614 | if (vdisk_info & VDISK_READONLY) |
607 | set_disk_ro(gd, 1); | 615 | set_disk_ro(gd, 1); |
608 | 616 | ||
609 | if (vdisk_info & VDISK_REMOVABLE) | 617 | if (vdisk_info & VDISK_REMOVABLE) |
610 | gd->flags |= GENHD_FL_REMOVABLE; | 618 | gd->flags |= GENHD_FL_REMOVABLE; |
611 | 619 | ||
612 | if (vdisk_info & VDISK_CDROM) | 620 | if (vdisk_info & VDISK_CDROM) |
613 | gd->flags |= GENHD_FL_CD; | 621 | gd->flags |= GENHD_FL_CD; |
614 | 622 | ||
615 | return 0; | 623 | return 0; |
616 | 624 | ||
617 | release: | 625 | release: |
618 | xlbd_release_minors(minor, nr_minors); | 626 | xlbd_release_minors(minor, nr_minors); |
619 | out: | 627 | out: |
620 | return err; | 628 | return err; |
621 | } | 629 | } |
622 | 630 | ||
623 | static void xlvbd_release_gendisk(struct blkfront_info *info) | 631 | static void xlvbd_release_gendisk(struct blkfront_info *info) |
624 | { | 632 | { |
625 | unsigned int minor, nr_minors; | 633 | unsigned int minor, nr_minors; |
626 | unsigned long flags; | 634 | unsigned long flags; |
627 | 635 | ||
628 | if (info->rq == NULL) | 636 | if (info->rq == NULL) |
629 | return; | 637 | return; |
630 | 638 | ||
631 | spin_lock_irqsave(&blkif_io_lock, flags); | 639 | spin_lock_irqsave(&blkif_io_lock, flags); |
632 | 640 | ||
633 | /* No more blkif_request(). */ | 641 | /* No more blkif_request(). */ |
634 | blk_stop_queue(info->rq); | 642 | blk_stop_queue(info->rq); |
635 | 643 | ||
636 | /* No more gnttab callback work. */ | 644 | /* No more gnttab callback work. */ |
637 | gnttab_cancel_free_callback(&info->callback); | 645 | gnttab_cancel_free_callback(&info->callback); |
638 | spin_unlock_irqrestore(&blkif_io_lock, flags); | 646 | spin_unlock_irqrestore(&blkif_io_lock, flags); |
639 | 647 | ||
640 | /* Flush gnttab callback work. Must be done with no locks held. */ | 648 | /* Flush gnttab callback work. Must be done with no locks held. */ |
641 | flush_work_sync(&info->work); | 649 | flush_work_sync(&info->work); |
642 | 650 | ||
643 | del_gendisk(info->gd); | 651 | del_gendisk(info->gd); |
644 | 652 | ||
645 | minor = info->gd->first_minor; | 653 | minor = info->gd->first_minor; |
646 | nr_minors = info->gd->minors; | 654 | nr_minors = info->gd->minors; |
647 | xlbd_release_minors(minor, nr_minors); | 655 | xlbd_release_minors(minor, nr_minors); |
648 | 656 | ||
649 | blk_cleanup_queue(info->rq); | 657 | blk_cleanup_queue(info->rq); |
650 | info->rq = NULL; | 658 | info->rq = NULL; |
651 | 659 | ||
652 | put_disk(info->gd); | 660 | put_disk(info->gd); |
653 | info->gd = NULL; | 661 | info->gd = NULL; |
654 | } | 662 | } |
655 | 663 | ||
656 | static void kick_pending_request_queues(struct blkfront_info *info) | 664 | static void kick_pending_request_queues(struct blkfront_info *info) |
657 | { | 665 | { |
658 | if (!RING_FULL(&info->ring)) { | 666 | if (!RING_FULL(&info->ring)) { |
659 | /* Re-enable calldowns. */ | 667 | /* Re-enable calldowns. */ |
660 | blk_start_queue(info->rq); | 668 | blk_start_queue(info->rq); |
661 | /* Kick things off immediately. */ | 669 | /* Kick things off immediately. */ |
662 | do_blkif_request(info->rq); | 670 | do_blkif_request(info->rq); |
663 | } | 671 | } |
664 | } | 672 | } |
665 | 673 | ||
666 | static void blkif_restart_queue(struct work_struct *work) | 674 | static void blkif_restart_queue(struct work_struct *work) |
667 | { | 675 | { |
668 | struct blkfront_info *info = container_of(work, struct blkfront_info, work); | 676 | struct blkfront_info *info = container_of(work, struct blkfront_info, work); |
669 | 677 | ||
670 | spin_lock_irq(&blkif_io_lock); | 678 | spin_lock_irq(&blkif_io_lock); |
671 | if (info->connected == BLKIF_STATE_CONNECTED) | 679 | if (info->connected == BLKIF_STATE_CONNECTED) |
672 | kick_pending_request_queues(info); | 680 | kick_pending_request_queues(info); |
673 | spin_unlock_irq(&blkif_io_lock); | 681 | spin_unlock_irq(&blkif_io_lock); |
674 | } | 682 | } |
675 | 683 | ||
676 | static void blkif_free(struct blkfront_info *info, int suspend) | 684 | static void blkif_free(struct blkfront_info *info, int suspend) |
677 | { | 685 | { |
678 | /* Prevent new requests being issued until we fix things up. */ | 686 | /* Prevent new requests being issued until we fix things up. */ |
679 | spin_lock_irq(&blkif_io_lock); | 687 | spin_lock_irq(&blkif_io_lock); |
680 | info->connected = suspend ? | 688 | info->connected = suspend ? |
681 | BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; | 689 | BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; |
682 | /* No more blkif_request(). */ | 690 | /* No more blkif_request(). */ |
683 | if (info->rq) | 691 | if (info->rq) |
684 | blk_stop_queue(info->rq); | 692 | blk_stop_queue(info->rq); |
685 | /* No more gnttab callback work. */ | 693 | /* No more gnttab callback work. */ |
686 | gnttab_cancel_free_callback(&info->callback); | 694 | gnttab_cancel_free_callback(&info->callback); |
687 | spin_unlock_irq(&blkif_io_lock); | 695 | spin_unlock_irq(&blkif_io_lock); |
688 | 696 | ||
689 | /* Flush gnttab callback work. Must be done with no locks held. */ | 697 | /* Flush gnttab callback work. Must be done with no locks held. */ |
690 | flush_work_sync(&info->work); | 698 | flush_work_sync(&info->work); |
691 | 699 | ||
692 | /* Free resources associated with old device channel. */ | 700 | /* Free resources associated with old device channel. */ |
693 | if (info->ring_ref != GRANT_INVALID_REF) { | 701 | if (info->ring_ref != GRANT_INVALID_REF) { |
694 | gnttab_end_foreign_access(info->ring_ref, 0, | 702 | gnttab_end_foreign_access(info->ring_ref, 0, |
695 | (unsigned long)info->ring.sring); | 703 | (unsigned long)info->ring.sring); |
696 | info->ring_ref = GRANT_INVALID_REF; | 704 | info->ring_ref = GRANT_INVALID_REF; |
697 | info->ring.sring = NULL; | 705 | info->ring.sring = NULL; |
698 | } | 706 | } |
699 | if (info->irq) | 707 | if (info->irq) |
700 | unbind_from_irqhandler(info->irq, info); | 708 | unbind_from_irqhandler(info->irq, info); |
701 | info->evtchn = info->irq = 0; | 709 | info->evtchn = info->irq = 0; |
702 | 710 | ||
703 | } | 711 | } |
704 | 712 | ||
705 | static void blkif_completion(struct blk_shadow *s) | 713 | static void blkif_completion(struct blk_shadow *s) |
706 | { | 714 | { |
707 | int i; | 715 | int i; |
708 | for (i = 0; i < s->req.nr_segments; i++) | 716 | /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place |
717 | * flag. */ | ||
718 | for (i = 0; i < s->req.u.rw.nr_segments; i++) | ||
709 | gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); | 719 | gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); |
710 | } | 720 | } |
711 | 721 | ||
712 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) | 722 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) |
713 | { | 723 | { |
714 | struct request *req; | 724 | struct request *req; |
715 | struct blkif_response *bret; | 725 | struct blkif_response *bret; |
716 | RING_IDX i, rp; | 726 | RING_IDX i, rp; |
717 | unsigned long flags; | 727 | unsigned long flags; |
718 | struct blkfront_info *info = (struct blkfront_info *)dev_id; | 728 | struct blkfront_info *info = (struct blkfront_info *)dev_id; |
719 | int error; | 729 | int error; |
720 | 730 | ||
721 | spin_lock_irqsave(&blkif_io_lock, flags); | 731 | spin_lock_irqsave(&blkif_io_lock, flags); |
722 | 732 | ||
723 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { | 733 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { |
724 | spin_unlock_irqrestore(&blkif_io_lock, flags); | 734 | spin_unlock_irqrestore(&blkif_io_lock, flags); |
725 | return IRQ_HANDLED; | 735 | return IRQ_HANDLED; |
726 | } | 736 | } |
727 | 737 | ||
728 | again: | 738 | again: |
729 | rp = info->ring.sring->rsp_prod; | 739 | rp = info->ring.sring->rsp_prod; |
730 | rmb(); /* Ensure we see queued responses up to 'rp'. */ | 740 | rmb(); /* Ensure we see queued responses up to 'rp'. */ |
731 | 741 | ||
732 | for (i = info->ring.rsp_cons; i != rp; i++) { | 742 | for (i = info->ring.rsp_cons; i != rp; i++) { |
733 | unsigned long id; | 743 | unsigned long id; |
734 | 744 | ||
735 | bret = RING_GET_RESPONSE(&info->ring, i); | 745 | bret = RING_GET_RESPONSE(&info->ring, i); |
736 | id = bret->id; | 746 | id = bret->id; |
737 | req = info->shadow[id].request; | 747 | req = info->shadow[id].request; |
738 | 748 | ||
739 | blkif_completion(&info->shadow[id]); | 749 | if (bret->operation != BLKIF_OP_DISCARD) |
750 | blkif_completion(&info->shadow[id]); | ||
740 | 751 | ||
741 | add_id_to_freelist(info, id); | 752 | add_id_to_freelist(info, id); |
742 | 753 | ||
743 | error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; | 754 | error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; |
744 | switch (bret->operation) { | 755 | switch (bret->operation) { |
745 | case BLKIF_OP_DISCARD: | 756 | case BLKIF_OP_DISCARD: |
746 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | 757 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { |
747 | struct request_queue *rq = info->rq; | 758 | struct request_queue *rq = info->rq; |
748 | printk(KERN_WARNING "blkfront: %s: discard op failed\n", | 759 | printk(KERN_WARNING "blkfront: %s: discard op failed\n", |
749 | info->gd->disk_name); | 760 | info->gd->disk_name); |
750 | error = -EOPNOTSUPP; | 761 | error = -EOPNOTSUPP; |
751 | info->feature_discard = 0; | 762 | info->feature_discard = 0; |
763 | info->feature_secdiscard = 0; | ||
752 | queue_flag_clear(QUEUE_FLAG_DISCARD, rq); | 764 | queue_flag_clear(QUEUE_FLAG_DISCARD, rq); |
765 | queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); | ||
753 | } | 766 | } |
754 | __blk_end_request_all(req, error); | 767 | __blk_end_request_all(req, error); |
755 | break; | 768 | break; |
756 | case BLKIF_OP_FLUSH_DISKCACHE: | 769 | case BLKIF_OP_FLUSH_DISKCACHE: |
757 | case BLKIF_OP_WRITE_BARRIER: | 770 | case BLKIF_OP_WRITE_BARRIER: |
758 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | 771 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { |
759 | printk(KERN_WARNING "blkfront: %s: write %s op failed\n", | 772 | printk(KERN_WARNING "blkfront: %s: write %s op failed\n", |
760 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | 773 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? |
761 | "barrier" : "flush disk cache", | 774 | "barrier" : "flush disk cache", |
762 | info->gd->disk_name); | 775 | info->gd->disk_name); |
763 | error = -EOPNOTSUPP; | 776 | error = -EOPNOTSUPP; |
764 | } | 777 | } |
765 | if (unlikely(bret->status == BLKIF_RSP_ERROR && | 778 | if (unlikely(bret->status == BLKIF_RSP_ERROR && |
766 | info->shadow[id].req.nr_segments == 0)) { | 779 | info->shadow[id].req.u.rw.nr_segments == 0)) { |
767 | printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", | 780 | printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", |
768 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | 781 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? |
769 | "barrier" : "flush disk cache", | 782 | "barrier" : "flush disk cache", |
770 | info->gd->disk_name); | 783 | info->gd->disk_name); |
771 | error = -EOPNOTSUPP; | 784 | error = -EOPNOTSUPP; |
772 | } | 785 | } |
773 | if (unlikely(error)) { | 786 | if (unlikely(error)) { |
774 | if (error == -EOPNOTSUPP) | 787 | if (error == -EOPNOTSUPP) |
775 | error = 0; | 788 | error = 0; |
776 | info->feature_flush = 0; | 789 | info->feature_flush = 0; |
777 | info->flush_op = 0; | 790 | info->flush_op = 0; |
778 | xlvbd_flush(info); | 791 | xlvbd_flush(info); |
779 | } | 792 | } |
780 | /* fall through */ | 793 | /* fall through */ |
781 | case BLKIF_OP_READ: | 794 | case BLKIF_OP_READ: |
782 | case BLKIF_OP_WRITE: | 795 | case BLKIF_OP_WRITE: |
783 | if (unlikely(bret->status != BLKIF_RSP_OKAY)) | 796 | if (unlikely(bret->status != BLKIF_RSP_OKAY)) |
784 | dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " | 797 | dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " |
785 | "request: %x\n", bret->status); | 798 | "request: %x\n", bret->status); |
786 | 799 | ||
787 | __blk_end_request_all(req, error); | 800 | __blk_end_request_all(req, error); |
788 | break; | 801 | break; |
789 | default: | 802 | default: |
790 | BUG(); | 803 | BUG(); |
791 | } | 804 | } |
792 | } | 805 | } |
793 | 806 | ||
794 | info->ring.rsp_cons = i; | 807 | info->ring.rsp_cons = i; |
795 | 808 | ||
796 | if (i != info->ring.req_prod_pvt) { | 809 | if (i != info->ring.req_prod_pvt) { |
797 | int more_to_do; | 810 | int more_to_do; |
798 | RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); | 811 | RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); |
799 | if (more_to_do) | 812 | if (more_to_do) |
800 | goto again; | 813 | goto again; |
801 | } else | 814 | } else |
802 | info->ring.sring->rsp_event = i + 1; | 815 | info->ring.sring->rsp_event = i + 1; |
803 | 816 | ||
804 | kick_pending_request_queues(info); | 817 | kick_pending_request_queues(info); |
805 | 818 | ||
806 | spin_unlock_irqrestore(&blkif_io_lock, flags); | 819 | spin_unlock_irqrestore(&blkif_io_lock, flags); |
807 | 820 | ||
808 | return IRQ_HANDLED; | 821 | return IRQ_HANDLED; |
809 | } | 822 | } |
810 | 823 | ||
811 | 824 | ||
812 | static int setup_blkring(struct xenbus_device *dev, | 825 | static int setup_blkring(struct xenbus_device *dev, |
813 | struct blkfront_info *info) | 826 | struct blkfront_info *info) |
814 | { | 827 | { |
815 | struct blkif_sring *sring; | 828 | struct blkif_sring *sring; |
816 | int err; | 829 | int err; |
817 | 830 | ||
818 | info->ring_ref = GRANT_INVALID_REF; | 831 | info->ring_ref = GRANT_INVALID_REF; |
819 | 832 | ||
820 | sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); | 833 | sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); |
821 | if (!sring) { | 834 | if (!sring) { |
822 | xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); | 835 | xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); |
823 | return -ENOMEM; | 836 | return -ENOMEM; |
824 | } | 837 | } |
825 | SHARED_RING_INIT(sring); | 838 | SHARED_RING_INIT(sring); |
826 | FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); | 839 | FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); |
827 | 840 | ||
828 | sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); | 841 | sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
829 | 842 | ||
830 | err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); | 843 | err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); |
831 | if (err < 0) { | 844 | if (err < 0) { |
832 | free_page((unsigned long)sring); | 845 | free_page((unsigned long)sring); |
833 | info->ring.sring = NULL; | 846 | info->ring.sring = NULL; |
834 | goto fail; | 847 | goto fail; |
835 | } | 848 | } |
836 | info->ring_ref = err; | 849 | info->ring_ref = err; |
837 | 850 | ||
838 | err = xenbus_alloc_evtchn(dev, &info->evtchn); | 851 | err = xenbus_alloc_evtchn(dev, &info->evtchn); |
839 | if (err) | 852 | if (err) |
840 | goto fail; | 853 | goto fail; |
841 | 854 | ||
842 | err = bind_evtchn_to_irqhandler(info->evtchn, | 855 | err = bind_evtchn_to_irqhandler(info->evtchn, |
843 | blkif_interrupt, | 856 | blkif_interrupt, |
844 | IRQF_SAMPLE_RANDOM, "blkif", info); | 857 | IRQF_SAMPLE_RANDOM, "blkif", info); |
845 | if (err <= 0) { | 858 | if (err <= 0) { |
846 | xenbus_dev_fatal(dev, err, | 859 | xenbus_dev_fatal(dev, err, |
847 | "bind_evtchn_to_irqhandler failed"); | 860 | "bind_evtchn_to_irqhandler failed"); |
848 | goto fail; | 861 | goto fail; |
849 | } | 862 | } |
850 | info->irq = err; | 863 | info->irq = err; |
851 | 864 | ||
852 | return 0; | 865 | return 0; |
853 | fail: | 866 | fail: |
854 | blkif_free(info, 0); | 867 | blkif_free(info, 0); |
855 | return err; | 868 | return err; |
856 | } | 869 | } |
857 | 870 | ||
858 | 871 | ||
859 | /* Common code used when first setting up, and when resuming. */ | 872 | /* Common code used when first setting up, and when resuming. */ |
860 | static int talk_to_blkback(struct xenbus_device *dev, | 873 | static int talk_to_blkback(struct xenbus_device *dev, |
861 | struct blkfront_info *info) | 874 | struct blkfront_info *info) |
862 | { | 875 | { |
863 | const char *message = NULL; | 876 | const char *message = NULL; |
864 | struct xenbus_transaction xbt; | 877 | struct xenbus_transaction xbt; |
865 | int err; | 878 | int err; |
866 | 879 | ||
867 | /* Create shared ring, alloc event channel. */ | 880 | /* Create shared ring, alloc event channel. */ |
868 | err = setup_blkring(dev, info); | 881 | err = setup_blkring(dev, info); |
869 | if (err) | 882 | if (err) |
870 | goto out; | 883 | goto out; |
871 | 884 | ||
872 | again: | 885 | again: |
873 | err = xenbus_transaction_start(&xbt); | 886 | err = xenbus_transaction_start(&xbt); |
874 | if (err) { | 887 | if (err) { |
875 | xenbus_dev_fatal(dev, err, "starting transaction"); | 888 | xenbus_dev_fatal(dev, err, "starting transaction"); |
876 | goto destroy_blkring; | 889 | goto destroy_blkring; |
877 | } | 890 | } |
878 | 891 | ||
879 | err = xenbus_printf(xbt, dev->nodename, | 892 | err = xenbus_printf(xbt, dev->nodename, |
880 | "ring-ref", "%u", info->ring_ref); | 893 | "ring-ref", "%u", info->ring_ref); |
881 | if (err) { | 894 | if (err) { |
882 | message = "writing ring-ref"; | 895 | message = "writing ring-ref"; |
883 | goto abort_transaction; | 896 | goto abort_transaction; |
884 | } | 897 | } |
885 | err = xenbus_printf(xbt, dev->nodename, | 898 | err = xenbus_printf(xbt, dev->nodename, |
886 | "event-channel", "%u", info->evtchn); | 899 | "event-channel", "%u", info->evtchn); |
887 | if (err) { | 900 | if (err) { |
888 | message = "writing event-channel"; | 901 | message = "writing event-channel"; |
889 | goto abort_transaction; | 902 | goto abort_transaction; |
890 | } | 903 | } |
891 | err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", | 904 | err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", |
892 | XEN_IO_PROTO_ABI_NATIVE); | 905 | XEN_IO_PROTO_ABI_NATIVE); |
893 | if (err) { | 906 | if (err) { |
894 | message = "writing protocol"; | 907 | message = "writing protocol"; |
895 | goto abort_transaction; | 908 | goto abort_transaction; |
896 | } | 909 | } |
897 | 910 | ||
898 | err = xenbus_transaction_end(xbt, 0); | 911 | err = xenbus_transaction_end(xbt, 0); |
899 | if (err) { | 912 | if (err) { |
900 | if (err == -EAGAIN) | 913 | if (err == -EAGAIN) |
901 | goto again; | 914 | goto again; |
902 | xenbus_dev_fatal(dev, err, "completing transaction"); | 915 | xenbus_dev_fatal(dev, err, "completing transaction"); |
903 | goto destroy_blkring; | 916 | goto destroy_blkring; |
904 | } | 917 | } |
905 | 918 | ||
906 | xenbus_switch_state(dev, XenbusStateInitialised); | 919 | xenbus_switch_state(dev, XenbusStateInitialised); |
907 | 920 | ||
908 | return 0; | 921 | return 0; |
909 | 922 | ||
910 | abort_transaction: | 923 | abort_transaction: |
911 | xenbus_transaction_end(xbt, 1); | 924 | xenbus_transaction_end(xbt, 1); |
912 | if (message) | 925 | if (message) |
913 | xenbus_dev_fatal(dev, err, "%s", message); | 926 | xenbus_dev_fatal(dev, err, "%s", message); |
914 | destroy_blkring: | 927 | destroy_blkring: |
915 | blkif_free(info, 0); | 928 | blkif_free(info, 0); |
916 | out: | 929 | out: |
917 | return err; | 930 | return err; |
918 | } | 931 | } |
919 | 932 | ||
920 | /** | 933 | /** |
921 | * Entry point to this code when a new device is created. Allocate the basic | 934 | * Entry point to this code when a new device is created. Allocate the basic |
922 | * structures and the ring buffer for communication with the backend, and | 935 | * structures and the ring buffer for communication with the backend, and |
923 | * inform the backend of the appropriate details for those. Switch to | 936 | * inform the backend of the appropriate details for those. Switch to |
924 | * Initialised state. | 937 | * Initialised state. |
925 | */ | 938 | */ |
926 | static int blkfront_probe(struct xenbus_device *dev, | 939 | static int blkfront_probe(struct xenbus_device *dev, |
927 | const struct xenbus_device_id *id) | 940 | const struct xenbus_device_id *id) |
928 | { | 941 | { |
929 | int err, vdevice, i; | 942 | int err, vdevice, i; |
930 | struct blkfront_info *info; | 943 | struct blkfront_info *info; |
931 | 944 | ||
932 | /* FIXME: Use dynamic device id if this is not set. */ | 945 | /* FIXME: Use dynamic device id if this is not set. */ |
933 | err = xenbus_scanf(XBT_NIL, dev->nodename, | 946 | err = xenbus_scanf(XBT_NIL, dev->nodename, |
934 | "virtual-device", "%i", &vdevice); | 947 | "virtual-device", "%i", &vdevice); |
935 | if (err != 1) { | 948 | if (err != 1) { |
936 | /* go looking in the extended area instead */ | 949 | /* go looking in the extended area instead */ |
937 | err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", | 950 | err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", |
938 | "%i", &vdevice); | 951 | "%i", &vdevice); |
939 | if (err != 1) { | 952 | if (err != 1) { |
940 | xenbus_dev_fatal(dev, err, "reading virtual-device"); | 953 | xenbus_dev_fatal(dev, err, "reading virtual-device"); |
941 | return err; | 954 | return err; |
942 | } | 955 | } |
943 | } | 956 | } |
944 | 957 | ||
945 | if (xen_hvm_domain()) { | 958 | if (xen_hvm_domain()) { |
946 | char *type; | 959 | char *type; |
947 | int len; | 960 | int len; |
948 | /* no unplug has been done: do not hook devices != xen vbds */ | 961 | /* no unplug has been done: do not hook devices != xen vbds */ |
949 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { | 962 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { |
950 | int major; | 963 | int major; |
951 | 964 | ||
952 | if (!VDEV_IS_EXTENDED(vdevice)) | 965 | if (!VDEV_IS_EXTENDED(vdevice)) |
953 | major = BLKIF_MAJOR(vdevice); | 966 | major = BLKIF_MAJOR(vdevice); |
954 | else | 967 | else |
955 | major = XENVBD_MAJOR; | 968 | major = XENVBD_MAJOR; |
956 | 969 | ||
957 | if (major != XENVBD_MAJOR) { | 970 | if (major != XENVBD_MAJOR) { |
958 | printk(KERN_INFO | 971 | printk(KERN_INFO |
959 | "%s: HVM does not support vbd %d as xen block device\n", | 972 | "%s: HVM does not support vbd %d as xen block device\n", |
960 | __FUNCTION__, vdevice); | 973 | __FUNCTION__, vdevice); |
961 | return -ENODEV; | 974 | return -ENODEV; |
962 | } | 975 | } |
963 | } | 976 | } |
964 | /* do not create a PV cdrom device if we are an HVM guest */ | 977 | /* do not create a PV cdrom device if we are an HVM guest */ |
965 | type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); | 978 | type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); |
966 | if (IS_ERR(type)) | 979 | if (IS_ERR(type)) |
967 | return -ENODEV; | 980 | return -ENODEV; |
968 | if (strncmp(type, "cdrom", 5) == 0) { | 981 | if (strncmp(type, "cdrom", 5) == 0) { |
969 | kfree(type); | 982 | kfree(type); |
970 | return -ENODEV; | 983 | return -ENODEV; |
971 | } | 984 | } |
972 | kfree(type); | 985 | kfree(type); |
973 | } | 986 | } |
974 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 987 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
975 | if (!info) { | 988 | if (!info) { |
976 | xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); | 989 | xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); |
977 | return -ENOMEM; | 990 | return -ENOMEM; |
978 | } | 991 | } |
979 | 992 | ||
980 | mutex_init(&info->mutex); | 993 | mutex_init(&info->mutex); |
981 | info->xbdev = dev; | 994 | info->xbdev = dev; |
982 | info->vdevice = vdevice; | 995 | info->vdevice = vdevice; |
983 | info->connected = BLKIF_STATE_DISCONNECTED; | 996 | info->connected = BLKIF_STATE_DISCONNECTED; |
984 | INIT_WORK(&info->work, blkif_restart_queue); | 997 | INIT_WORK(&info->work, blkif_restart_queue); |
985 | 998 | ||
986 | for (i = 0; i < BLK_RING_SIZE; i++) | 999 | for (i = 0; i < BLK_RING_SIZE; i++) |
987 | info->shadow[i].req.id = i+1; | 1000 | info->shadow[i].req.u.rw.id = i+1; |
988 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | 1001 | info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; |
989 | 1002 | ||
990 | /* Front end dir is a number, which is used as the id. */ | 1003 | /* Front end dir is a number, which is used as the id. */ |
991 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); | 1004 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); |
992 | dev_set_drvdata(&dev->dev, info); | 1005 | dev_set_drvdata(&dev->dev, info); |
993 | 1006 | ||
994 | err = talk_to_blkback(dev, info); | 1007 | err = talk_to_blkback(dev, info); |
995 | if (err) { | 1008 | if (err) { |
996 | kfree(info); | 1009 | kfree(info); |
997 | dev_set_drvdata(&dev->dev, NULL); | 1010 | dev_set_drvdata(&dev->dev, NULL); |
998 | return err; | 1011 | return err; |
999 | } | 1012 | } |
1000 | 1013 | ||
1001 | return 0; | 1014 | return 0; |
1002 | } | 1015 | } |
1003 | 1016 | ||
1004 | 1017 | ||
1005 | static int blkif_recover(struct blkfront_info *info) | 1018 | static int blkif_recover(struct blkfront_info *info) |
1006 | { | 1019 | { |
1007 | int i; | 1020 | int i; |
1008 | struct blkif_request *req; | 1021 | struct blkif_request *req; |
1009 | struct blk_shadow *copy; | 1022 | struct blk_shadow *copy; |
1010 | int j; | 1023 | int j; |
1011 | 1024 | ||
1012 | /* Stage 1: Make a safe copy of the shadow state. */ | 1025 | /* Stage 1: Make a safe copy of the shadow state. */ |
1013 | copy = kmalloc(sizeof(info->shadow), | 1026 | copy = kmalloc(sizeof(info->shadow), |
1014 | GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); | 1027 | GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); |
1015 | if (!copy) | 1028 | if (!copy) |
1016 | return -ENOMEM; | 1029 | return -ENOMEM; |
1017 | memcpy(copy, info->shadow, sizeof(info->shadow)); | 1030 | memcpy(copy, info->shadow, sizeof(info->shadow)); |
1018 | 1031 | ||
1019 | /* Stage 2: Set up free list. */ | 1032 | /* Stage 2: Set up free list. */ |
1020 | memset(&info->shadow, 0, sizeof(info->shadow)); | 1033 | memset(&info->shadow, 0, sizeof(info->shadow)); |
1021 | for (i = 0; i < BLK_RING_SIZE; i++) | 1034 | for (i = 0; i < BLK_RING_SIZE; i++) |
1022 | info->shadow[i].req.id = i+1; | 1035 | info->shadow[i].req.u.rw.id = i+1; |
1023 | info->shadow_free = info->ring.req_prod_pvt; | 1036 | info->shadow_free = info->ring.req_prod_pvt; |
1024 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | 1037 | info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; |
1025 | 1038 | ||
1026 | /* Stage 3: Find pending requests and requeue them. */ | 1039 | /* Stage 3: Find pending requests and requeue them. */ |
1027 | for (i = 0; i < BLK_RING_SIZE; i++) { | 1040 | for (i = 0; i < BLK_RING_SIZE; i++) { |
1028 | /* Not in use? */ | 1041 | /* Not in use? */ |
1029 | if (!copy[i].request) | 1042 | if (!copy[i].request) |
1030 | continue; | 1043 | continue; |
1031 | 1044 | ||
1032 | /* Grab a request slot and copy shadow state into it. */ | 1045 | /* Grab a request slot and copy shadow state into it. */ |
1033 | req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | 1046 | req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); |
1034 | *req = copy[i].req; | 1047 | *req = copy[i].req; |
1035 | 1048 | ||
1036 | /* We get a new request id, and must reset the shadow state. */ | 1049 | /* We get a new request id, and must reset the shadow state. */ |
1037 | req->id = get_id_from_freelist(info); | 1050 | req->u.rw.id = get_id_from_freelist(info); |
1038 | memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); | 1051 | memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); |
1039 | 1052 | ||
1053 | if (req->operation != BLKIF_OP_DISCARD) { | ||
1040 | /* Rewrite any grant references invalidated by susp/resume. */ | 1054 | /* Rewrite any grant references invalidated by susp/resume. */ |
1041 | for (j = 0; j < req->nr_segments; j++) | 1055 | for (j = 0; j < req->u.rw.nr_segments; j++) |
1042 | gnttab_grant_foreign_access_ref( | 1056 | gnttab_grant_foreign_access_ref( |
1043 | req->u.rw.seg[j].gref, | 1057 | req->u.rw.seg[j].gref, |
1044 | info->xbdev->otherend_id, | 1058 | info->xbdev->otherend_id, |
1045 | pfn_to_mfn(info->shadow[req->id].frame[j]), | 1059 | pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), |
1046 | rq_data_dir(info->shadow[req->id].request)); | 1060 | rq_data_dir(info->shadow[req->u.rw.id].request)); |
1047 | info->shadow[req->id].req = *req; | 1061 | } |
1062 | info->shadow[req->u.rw.id].req = *req; | ||
1048 | 1063 | ||
1049 | info->ring.req_prod_pvt++; | 1064 | info->ring.req_prod_pvt++; |
1050 | } | 1065 | } |
1051 | 1066 | ||
1052 | kfree(copy); | 1067 | kfree(copy); |
1053 | 1068 | ||
1054 | xenbus_switch_state(info->xbdev, XenbusStateConnected); | 1069 | xenbus_switch_state(info->xbdev, XenbusStateConnected); |
1055 | 1070 | ||
1056 | spin_lock_irq(&blkif_io_lock); | 1071 | spin_lock_irq(&blkif_io_lock); |
1057 | 1072 | ||
1058 | /* Now safe for us to use the shared ring */ | 1073 | /* Now safe for us to use the shared ring */ |
1059 | info->connected = BLKIF_STATE_CONNECTED; | 1074 | info->connected = BLKIF_STATE_CONNECTED; |
1060 | 1075 | ||
1061 | /* Send off requeued requests */ | 1076 | /* Send off requeued requests */ |
1062 | flush_requests(info); | 1077 | flush_requests(info); |
1063 | 1078 | ||
1064 | /* Kick any other new requests queued since we resumed */ | 1079 | /* Kick any other new requests queued since we resumed */ |
1065 | kick_pending_request_queues(info); | 1080 | kick_pending_request_queues(info); |
1066 | 1081 | ||
1067 | spin_unlock_irq(&blkif_io_lock); | 1082 | spin_unlock_irq(&blkif_io_lock); |
1068 | 1083 | ||
1069 | return 0; | 1084 | return 0; |
1070 | } | 1085 | } |
1071 | 1086 | ||
1072 | /** | 1087 | /** |
1073 | * We are reconnecting to the backend, due to a suspend/resume, or a backend | 1088 | * We are reconnecting to the backend, due to a suspend/resume, or a backend |
1074 | * driver restart. We tear down our blkif structure and recreate it, but | 1089 | * driver restart. We tear down our blkif structure and recreate it, but |
1075 | * leave the device-layer structures intact so that this is transparent to the | 1090 | * leave the device-layer structures intact so that this is transparent to the |
1076 | * rest of the kernel. | 1091 | * rest of the kernel. |
1077 | */ | 1092 | */ |
1078 | static int blkfront_resume(struct xenbus_device *dev) | 1093 | static int blkfront_resume(struct xenbus_device *dev) |
1079 | { | 1094 | { |
1080 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); | 1095 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); |
1081 | int err; | 1096 | int err; |
1082 | 1097 | ||
1083 | dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); | 1098 | dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); |
1084 | 1099 | ||
1085 | blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); | 1100 | blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); |
1086 | 1101 | ||
1087 | err = talk_to_blkback(dev, info); | 1102 | err = talk_to_blkback(dev, info); |
1088 | if (info->connected == BLKIF_STATE_SUSPENDED && !err) | 1103 | if (info->connected == BLKIF_STATE_SUSPENDED && !err) |
1089 | err = blkif_recover(info); | 1104 | err = blkif_recover(info); |
1090 | 1105 | ||
1091 | return err; | 1106 | return err; |
1092 | } | 1107 | } |
1093 | 1108 | ||
1094 | static void | 1109 | static void |
1095 | blkfront_closing(struct blkfront_info *info) | 1110 | blkfront_closing(struct blkfront_info *info) |
1096 | { | 1111 | { |
1097 | struct xenbus_device *xbdev = info->xbdev; | 1112 | struct xenbus_device *xbdev = info->xbdev; |
1098 | struct block_device *bdev = NULL; | 1113 | struct block_device *bdev = NULL; |
1099 | 1114 | ||
1100 | mutex_lock(&info->mutex); | 1115 | mutex_lock(&info->mutex); |
1101 | 1116 | ||
1102 | if (xbdev->state == XenbusStateClosing) { | 1117 | if (xbdev->state == XenbusStateClosing) { |
1103 | mutex_unlock(&info->mutex); | 1118 | mutex_unlock(&info->mutex); |
1104 | return; | 1119 | return; |
1105 | } | 1120 | } |
1106 | 1121 | ||
1107 | if (info->gd) | 1122 | if (info->gd) |
1108 | bdev = bdget_disk(info->gd, 0); | 1123 | bdev = bdget_disk(info->gd, 0); |
1109 | 1124 | ||
1110 | mutex_unlock(&info->mutex); | 1125 | mutex_unlock(&info->mutex); |
1111 | 1126 | ||
1112 | if (!bdev) { | 1127 | if (!bdev) { |
1113 | xenbus_frontend_closed(xbdev); | 1128 | xenbus_frontend_closed(xbdev); |
1114 | return; | 1129 | return; |
1115 | } | 1130 | } |
1116 | 1131 | ||
1117 | mutex_lock(&bdev->bd_mutex); | 1132 | mutex_lock(&bdev->bd_mutex); |
1118 | 1133 | ||
1119 | if (bdev->bd_openers) { | 1134 | if (bdev->bd_openers) { |
1120 | xenbus_dev_error(xbdev, -EBUSY, | 1135 | xenbus_dev_error(xbdev, -EBUSY, |
1121 | "Device in use; refusing to close"); | 1136 | "Device in use; refusing to close"); |
1122 | xenbus_switch_state(xbdev, XenbusStateClosing); | 1137 | xenbus_switch_state(xbdev, XenbusStateClosing); |
1123 | } else { | 1138 | } else { |
1124 | xlvbd_release_gendisk(info); | 1139 | xlvbd_release_gendisk(info); |
1125 | xenbus_frontend_closed(xbdev); | 1140 | xenbus_frontend_closed(xbdev); |
1126 | } | 1141 | } |
1127 | 1142 | ||
1128 | mutex_unlock(&bdev->bd_mutex); | 1143 | mutex_unlock(&bdev->bd_mutex); |
1129 | bdput(bdev); | 1144 | bdput(bdev); |
1130 | } | 1145 | } |
1131 | 1146 | ||
1132 | static void blkfront_setup_discard(struct blkfront_info *info) | 1147 | static void blkfront_setup_discard(struct blkfront_info *info) |
1133 | { | 1148 | { |
1134 | int err; | 1149 | int err; |
1135 | char *type; | 1150 | char *type; |
1136 | unsigned int discard_granularity; | 1151 | unsigned int discard_granularity; |
1137 | unsigned int discard_alignment; | 1152 | unsigned int discard_alignment; |
1153 | unsigned int discard_secure; | ||
1138 | 1154 | ||
1139 | type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); | 1155 | type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); |
1140 | if (IS_ERR(type)) | 1156 | if (IS_ERR(type)) |
1141 | return; | 1157 | return; |
1142 | 1158 | ||
1159 | info->feature_secdiscard = 0; | ||
1143 | if (strncmp(type, "phy", 3) == 0) { | 1160 | if (strncmp(type, "phy", 3) == 0) { |
1144 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1161 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1145 | "discard-granularity", "%u", &discard_granularity, | 1162 | "discard-granularity", "%u", &discard_granularity, |
1146 | "discard-alignment", "%u", &discard_alignment, | 1163 | "discard-alignment", "%u", &discard_alignment, |
1147 | NULL); | 1164 | NULL); |
1148 | if (!err) { | 1165 | if (!err) { |
1149 | info->feature_discard = 1; | 1166 | info->feature_discard = 1; |
1150 | info->discard_granularity = discard_granularity; | 1167 | info->discard_granularity = discard_granularity; |
1151 | info->discard_alignment = discard_alignment; | 1168 | info->discard_alignment = discard_alignment; |
1152 | } | 1169 | } |
1170 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | ||
1171 | "discard-secure", "%d", &discard_secure, | ||
1172 | NULL); | ||
1173 | if (!err) | ||
1174 | info->feature_secdiscard = discard_secure; | ||
1175 | |||
1153 | } else if (strncmp(type, "file", 4) == 0) | 1176 | } else if (strncmp(type, "file", 4) == 0) |
1154 | info->feature_discard = 1; | 1177 | info->feature_discard = 1; |
1155 | 1178 | ||
1156 | kfree(type); | 1179 | kfree(type); |
1157 | } | 1180 | } |
1158 | 1181 | ||
1159 | /* | 1182 | /* |
1160 | * Invoked when the backend is finally 'ready' (and has told produced | 1183 | * Invoked when the backend is finally 'ready' (and has told produced |
1161 | * the details about the physical device - #sectors, size, etc). | 1184 | * the details about the physical device - #sectors, size, etc). |
1162 | */ | 1185 | */ |
1163 | static void blkfront_connect(struct blkfront_info *info) | 1186 | static void blkfront_connect(struct blkfront_info *info) |
1164 | { | 1187 | { |
1165 | unsigned long long sectors; | 1188 | unsigned long long sectors; |
1166 | unsigned long sector_size; | 1189 | unsigned long sector_size; |
1167 | unsigned int binfo; | 1190 | unsigned int binfo; |
1168 | int err; | 1191 | int err; |
1169 | int barrier, flush, discard; | 1192 | int barrier, flush, discard; |
1170 | 1193 | ||
1171 | switch (info->connected) { | 1194 | switch (info->connected) { |
1172 | case BLKIF_STATE_CONNECTED: | 1195 | case BLKIF_STATE_CONNECTED: |
1173 | /* | 1196 | /* |
1174 | * Potentially, the back-end may be signalling | 1197 | * Potentially, the back-end may be signalling |
1175 | * a capacity change; update the capacity. | 1198 | * a capacity change; update the capacity. |
1176 | */ | 1199 | */ |
1177 | err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, | 1200 | err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, |
1178 | "sectors", "%Lu", §ors); | 1201 | "sectors", "%Lu", §ors); |
1179 | if (XENBUS_EXIST_ERR(err)) | 1202 | if (XENBUS_EXIST_ERR(err)) |
1180 | return; | 1203 | return; |
1181 | printk(KERN_INFO "Setting capacity to %Lu\n", | 1204 | printk(KERN_INFO "Setting capacity to %Lu\n", |
1182 | sectors); | 1205 | sectors); |
1183 | set_capacity(info->gd, sectors); | 1206 | set_capacity(info->gd, sectors); |
1184 | revalidate_disk(info->gd); | 1207 | revalidate_disk(info->gd); |
1185 | 1208 | ||
1186 | /* fall through */ | 1209 | /* fall through */ |
1187 | case BLKIF_STATE_SUSPENDED: | 1210 | case BLKIF_STATE_SUSPENDED: |
1188 | return; | 1211 | return; |
1189 | 1212 | ||
1190 | default: | 1213 | default: |
1191 | break; | 1214 | break; |
1192 | } | 1215 | } |
1193 | 1216 | ||
1194 | dev_dbg(&info->xbdev->dev, "%s:%s.\n", | 1217 | dev_dbg(&info->xbdev->dev, "%s:%s.\n", |
1195 | __func__, info->xbdev->otherend); | 1218 | __func__, info->xbdev->otherend); |
1196 | 1219 | ||
1197 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1220 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1198 | "sectors", "%llu", §ors, | 1221 | "sectors", "%llu", §ors, |
1199 | "info", "%u", &binfo, | 1222 | "info", "%u", &binfo, |
1200 | "sector-size", "%lu", §or_size, | 1223 | "sector-size", "%lu", §or_size, |
1201 | NULL); | 1224 | NULL); |
1202 | if (err) { | 1225 | if (err) { |
1203 | xenbus_dev_fatal(info->xbdev, err, | 1226 | xenbus_dev_fatal(info->xbdev, err, |
1204 | "reading backend fields at %s", | 1227 | "reading backend fields at %s", |
1205 | info->xbdev->otherend); | 1228 | info->xbdev->otherend); |
1206 | return; | 1229 | return; |
1207 | } | 1230 | } |
1208 | 1231 | ||
1209 | info->feature_flush = 0; | 1232 | info->feature_flush = 0; |
1210 | info->flush_op = 0; | 1233 | info->flush_op = 0; |
1211 | 1234 | ||
1212 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1235 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1213 | "feature-barrier", "%d", &barrier, | 1236 | "feature-barrier", "%d", &barrier, |
1214 | NULL); | 1237 | NULL); |
1215 | 1238 | ||
1216 | /* | 1239 | /* |
1217 | * If there's no "feature-barrier" defined, then it means | 1240 | * If there's no "feature-barrier" defined, then it means |
1218 | * we're dealing with a very old backend which writes | 1241 | * we're dealing with a very old backend which writes |
1219 | * synchronously; nothing to do. | 1242 | * synchronously; nothing to do. |
1220 | * | 1243 | * |
1221 | * If there are barriers, then we use flush. | 1244 | * If there are barriers, then we use flush. |
1222 | */ | 1245 | */ |
1223 | if (!err && barrier) { | 1246 | if (!err && barrier) { |
1224 | info->feature_flush = REQ_FLUSH | REQ_FUA; | 1247 | info->feature_flush = REQ_FLUSH | REQ_FUA; |
1225 | info->flush_op = BLKIF_OP_WRITE_BARRIER; | 1248 | info->flush_op = BLKIF_OP_WRITE_BARRIER; |
1226 | } | 1249 | } |
1227 | /* | 1250 | /* |
1228 | * And if there is "feature-flush-cache" use that above | 1251 | * And if there is "feature-flush-cache" use that above |
1229 | * barriers. | 1252 | * barriers. |
1230 | */ | 1253 | */ |
1231 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1254 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1232 | "feature-flush-cache", "%d", &flush, | 1255 | "feature-flush-cache", "%d", &flush, |
1233 | NULL); | 1256 | NULL); |
1234 | 1257 | ||
1235 | if (!err && flush) { | 1258 | if (!err && flush) { |
1236 | info->feature_flush = REQ_FLUSH; | 1259 | info->feature_flush = REQ_FLUSH; |
1237 | info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; | 1260 | info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; |
1238 | } | 1261 | } |
1239 | 1262 | ||
1240 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1263 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1241 | "feature-discard", "%d", &discard, | 1264 | "feature-discard", "%d", &discard, |
1242 | NULL); | 1265 | NULL); |
1243 | 1266 | ||
1244 | if (!err && discard) | 1267 | if (!err && discard) |
1245 | blkfront_setup_discard(info); | 1268 | blkfront_setup_discard(info); |
1246 | 1269 | ||
1247 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); | 1270 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); |
1248 | if (err) { | 1271 | if (err) { |
1249 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", | 1272 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", |
1250 | info->xbdev->otherend); | 1273 | info->xbdev->otherend); |
1251 | return; | 1274 | return; |
1252 | } | 1275 | } |
1253 | 1276 | ||
1254 | xenbus_switch_state(info->xbdev, XenbusStateConnected); | 1277 | xenbus_switch_state(info->xbdev, XenbusStateConnected); |
1255 | 1278 | ||
1256 | /* Kick pending requests. */ | 1279 | /* Kick pending requests. */ |
1257 | spin_lock_irq(&blkif_io_lock); | 1280 | spin_lock_irq(&blkif_io_lock); |
1258 | info->connected = BLKIF_STATE_CONNECTED; | 1281 | info->connected = BLKIF_STATE_CONNECTED; |
1259 | kick_pending_request_queues(info); | 1282 | kick_pending_request_queues(info); |
1260 | spin_unlock_irq(&blkif_io_lock); | 1283 | spin_unlock_irq(&blkif_io_lock); |
1261 | 1284 | ||
1262 | add_disk(info->gd); | 1285 | add_disk(info->gd); |
1263 | 1286 | ||
1264 | info->is_ready = 1; | 1287 | info->is_ready = 1; |
1265 | } | 1288 | } |
1266 | 1289 | ||
1267 | /** | 1290 | /** |
1268 | * Callback received when the backend's state changes. | 1291 | * Callback received when the backend's state changes. |
1269 | */ | 1292 | */ |
1270 | static void blkback_changed(struct xenbus_device *dev, | 1293 | static void blkback_changed(struct xenbus_device *dev, |
1271 | enum xenbus_state backend_state) | 1294 | enum xenbus_state backend_state) |
1272 | { | 1295 | { |
1273 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); | 1296 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); |
1274 | 1297 | ||
1275 | dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); | 1298 | dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); |
1276 | 1299 | ||
1277 | switch (backend_state) { | 1300 | switch (backend_state) { |
1278 | case XenbusStateInitialising: | 1301 | case XenbusStateInitialising: |
1279 | case XenbusStateInitWait: | 1302 | case XenbusStateInitWait: |
1280 | case XenbusStateInitialised: | 1303 | case XenbusStateInitialised: |
1281 | case XenbusStateReconfiguring: | 1304 | case XenbusStateReconfiguring: |
1282 | case XenbusStateReconfigured: | 1305 | case XenbusStateReconfigured: |
1283 | case XenbusStateUnknown: | 1306 | case XenbusStateUnknown: |
1284 | case XenbusStateClosed: | 1307 | case XenbusStateClosed: |
1285 | break; | 1308 | break; |
1286 | 1309 | ||
1287 | case XenbusStateConnected: | 1310 | case XenbusStateConnected: |
1288 | blkfront_connect(info); | 1311 | blkfront_connect(info); |
1289 | break; | 1312 | break; |
1290 | 1313 | ||
1291 | case XenbusStateClosing: | 1314 | case XenbusStateClosing: |
1292 | blkfront_closing(info); | 1315 | blkfront_closing(info); |
1293 | break; | 1316 | break; |
1294 | } | 1317 | } |
1295 | } | 1318 | } |
1296 | 1319 | ||
1297 | static int blkfront_remove(struct xenbus_device *xbdev) | 1320 | static int blkfront_remove(struct xenbus_device *xbdev) |
1298 | { | 1321 | { |
1299 | struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); | 1322 | struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); |
1300 | struct block_device *bdev = NULL; | 1323 | struct block_device *bdev = NULL; |
1301 | struct gendisk *disk; | 1324 | struct gendisk *disk; |
1302 | 1325 | ||
1303 | dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); | 1326 | dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); |
1304 | 1327 | ||
1305 | blkif_free(info, 0); | 1328 | blkif_free(info, 0); |
1306 | 1329 | ||
1307 | mutex_lock(&info->mutex); | 1330 | mutex_lock(&info->mutex); |
1308 | 1331 | ||
1309 | disk = info->gd; | 1332 | disk = info->gd; |
1310 | if (disk) | 1333 | if (disk) |
1311 | bdev = bdget_disk(disk, 0); | 1334 | bdev = bdget_disk(disk, 0); |
1312 | 1335 | ||
1313 | info->xbdev = NULL; | 1336 | info->xbdev = NULL; |
1314 | mutex_unlock(&info->mutex); | 1337 | mutex_unlock(&info->mutex); |
1315 | 1338 | ||
1316 | if (!bdev) { | 1339 | if (!bdev) { |
1317 | kfree(info); | 1340 | kfree(info); |
1318 | return 0; | 1341 | return 0; |
1319 | } | 1342 | } |
1320 | 1343 | ||
1321 | /* | 1344 | /* |
1322 | * The xbdev was removed before we reached the Closed | 1345 | * The xbdev was removed before we reached the Closed |
1323 | * state. See if it's safe to remove the disk. If the bdev | 1346 | * state. See if it's safe to remove the disk. If the bdev |
1324 | * isn't closed yet, we let release take care of it. | 1347 | * isn't closed yet, we let release take care of it. |
1325 | */ | 1348 | */ |
1326 | 1349 | ||
1327 | mutex_lock(&bdev->bd_mutex); | 1350 | mutex_lock(&bdev->bd_mutex); |
1328 | info = disk->private_data; | 1351 | info = disk->private_data; |
1329 | 1352 | ||
1330 | dev_warn(disk_to_dev(disk), | 1353 | dev_warn(disk_to_dev(disk), |
1331 | "%s was hot-unplugged, %d stale handles\n", | 1354 | "%s was hot-unplugged, %d stale handles\n", |
1332 | xbdev->nodename, bdev->bd_openers); | 1355 | xbdev->nodename, bdev->bd_openers); |
1333 | 1356 | ||
1334 | if (info && !bdev->bd_openers) { | 1357 | if (info && !bdev->bd_openers) { |
1335 | xlvbd_release_gendisk(info); | 1358 | xlvbd_release_gendisk(info); |
1336 | disk->private_data = NULL; | 1359 | disk->private_data = NULL; |
1337 | kfree(info); | 1360 | kfree(info); |
1338 | } | 1361 | } |
1339 | 1362 | ||
1340 | mutex_unlock(&bdev->bd_mutex); | 1363 | mutex_unlock(&bdev->bd_mutex); |
1341 | bdput(bdev); | 1364 | bdput(bdev); |
1342 | 1365 | ||
1343 | return 0; | 1366 | return 0; |
1344 | } | 1367 | } |
1345 | 1368 | ||
1346 | static int blkfront_is_ready(struct xenbus_device *dev) | 1369 | static int blkfront_is_ready(struct xenbus_device *dev) |
1347 | { | 1370 | { |
1348 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); | 1371 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); |
1349 | 1372 | ||
1350 | return info->is_ready && info->xbdev; | 1373 | return info->is_ready && info->xbdev; |
1351 | } | 1374 | } |
1352 | 1375 | ||
1353 | static int blkif_open(struct block_device *bdev, fmode_t mode) | 1376 | static int blkif_open(struct block_device *bdev, fmode_t mode) |
1354 | { | 1377 | { |
1355 | struct gendisk *disk = bdev->bd_disk; | 1378 | struct gendisk *disk = bdev->bd_disk; |
1356 | struct blkfront_info *info; | 1379 | struct blkfront_info *info; |
1357 | int err = 0; | 1380 | int err = 0; |
1358 | 1381 | ||
1359 | mutex_lock(&blkfront_mutex); | 1382 | mutex_lock(&blkfront_mutex); |
1360 | 1383 | ||
1361 | info = disk->private_data; | 1384 | info = disk->private_data; |
1362 | if (!info) { | 1385 | if (!info) { |
1363 | /* xbdev gone */ | 1386 | /* xbdev gone */ |
1364 | err = -ERESTARTSYS; | 1387 | err = -ERESTARTSYS; |
1365 | goto out; | 1388 | goto out; |
1366 | } | 1389 | } |
1367 | 1390 | ||
1368 | mutex_lock(&info->mutex); | 1391 | mutex_lock(&info->mutex); |
1369 | 1392 | ||
1370 | if (!info->gd) | 1393 | if (!info->gd) |
1371 | /* xbdev is closed */ | 1394 | /* xbdev is closed */ |
1372 | err = -ERESTARTSYS; | 1395 | err = -ERESTARTSYS; |
1373 | 1396 | ||
1374 | mutex_unlock(&info->mutex); | 1397 | mutex_unlock(&info->mutex); |
1375 | 1398 | ||
1376 | out: | 1399 | out: |
1377 | mutex_unlock(&blkfront_mutex); | 1400 | mutex_unlock(&blkfront_mutex); |
1378 | return err; | 1401 | return err; |
1379 | } | 1402 | } |
1380 | 1403 | ||
1381 | static int blkif_release(struct gendisk *disk, fmode_t mode) | 1404 | static int blkif_release(struct gendisk *disk, fmode_t mode) |
1382 | { | 1405 | { |
1383 | struct blkfront_info *info = disk->private_data; | 1406 | struct blkfront_info *info = disk->private_data; |
1384 | struct block_device *bdev; | 1407 | struct block_device *bdev; |
1385 | struct xenbus_device *xbdev; | 1408 | struct xenbus_device *xbdev; |
1386 | 1409 | ||
1387 | mutex_lock(&blkfront_mutex); | 1410 | mutex_lock(&blkfront_mutex); |
1388 | 1411 | ||
1389 | bdev = bdget_disk(disk, 0); | 1412 | bdev = bdget_disk(disk, 0); |
1390 | bdput(bdev); | 1413 | bdput(bdev); |
1391 | 1414 | ||
1392 | if (bdev->bd_openers) | 1415 | if (bdev->bd_openers) |
1393 | goto out; | 1416 | goto out; |
1394 | 1417 | ||
1395 | /* | 1418 | /* |
1396 | * Check if we have been instructed to close. We will have | 1419 | * Check if we have been instructed to close. We will have |
1397 | * deferred this request, because the bdev was still open. | 1420 | * deferred this request, because the bdev was still open. |
1398 | */ | 1421 | */ |
1399 | 1422 | ||
1400 | mutex_lock(&info->mutex); | 1423 | mutex_lock(&info->mutex); |
1401 | xbdev = info->xbdev; | 1424 | xbdev = info->xbdev; |
1402 | 1425 | ||
1403 | if (xbdev && xbdev->state == XenbusStateClosing) { | 1426 | if (xbdev && xbdev->state == XenbusStateClosing) { |
1404 | /* pending switch to state closed */ | 1427 | /* pending switch to state closed */ |
1405 | dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); | 1428 | dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); |
1406 | xlvbd_release_gendisk(info); | 1429 | xlvbd_release_gendisk(info); |
1407 | xenbus_frontend_closed(info->xbdev); | 1430 | xenbus_frontend_closed(info->xbdev); |
1408 | } | 1431 | } |
1409 | 1432 | ||
1410 | mutex_unlock(&info->mutex); | 1433 | mutex_unlock(&info->mutex); |
1411 | 1434 | ||
1412 | if (!xbdev) { | 1435 | if (!xbdev) { |
1413 | /* sudden device removal */ | 1436 | /* sudden device removal */ |
1414 | dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); | 1437 | dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); |
1415 | xlvbd_release_gendisk(info); | 1438 | xlvbd_release_gendisk(info); |
1416 | disk->private_data = NULL; | 1439 | disk->private_data = NULL; |
1417 | kfree(info); | 1440 | kfree(info); |
1418 | } | 1441 | } |
1419 | 1442 | ||
1420 | out: | 1443 | out: |
1421 | mutex_unlock(&blkfront_mutex); | 1444 | mutex_unlock(&blkfront_mutex); |
1422 | return 0; | 1445 | return 0; |
1423 | } | 1446 | } |
1424 | 1447 | ||
1425 | static const struct block_device_operations xlvbd_block_fops = | 1448 | static const struct block_device_operations xlvbd_block_fops = |
1426 | { | 1449 | { |
1427 | .owner = THIS_MODULE, | 1450 | .owner = THIS_MODULE, |
1428 | .open = blkif_open, | 1451 | .open = blkif_open, |
1429 | .release = blkif_release, | 1452 | .release = blkif_release, |
1430 | .getgeo = blkif_getgeo, | 1453 | .getgeo = blkif_getgeo, |
1431 | .ioctl = blkif_ioctl, | 1454 | .ioctl = blkif_ioctl, |
1432 | }; | 1455 | }; |
1433 | 1456 | ||
1434 | 1457 | ||
1435 | static const struct xenbus_device_id blkfront_ids[] = { | 1458 | static const struct xenbus_device_id blkfront_ids[] = { |
1436 | { "vbd" }, | 1459 | { "vbd" }, |
1437 | { "" } | 1460 | { "" } |
1438 | }; | 1461 | }; |
1439 | 1462 | ||
1440 | static DEFINE_XENBUS_DRIVER(blkfront, , | 1463 | static DEFINE_XENBUS_DRIVER(blkfront, , |
1441 | .probe = blkfront_probe, | 1464 | .probe = blkfront_probe, |
1442 | .remove = blkfront_remove, | 1465 | .remove = blkfront_remove, |
1443 | .resume = blkfront_resume, | 1466 | .resume = blkfront_resume, |
1444 | .otherend_changed = blkback_changed, | 1467 | .otherend_changed = blkback_changed, |
1445 | .is_ready = blkfront_is_ready, | 1468 | .is_ready = blkfront_is_ready, |
1446 | ); | 1469 | ); |
1447 | 1470 | ||
1448 | static int __init xlblk_init(void) | 1471 | static int __init xlblk_init(void) |
1449 | { | 1472 | { |
1450 | int ret; | 1473 | int ret; |
1451 | 1474 | ||
1452 | if (!xen_domain()) | 1475 | if (!xen_domain()) |
1453 | return -ENODEV; | 1476 | return -ENODEV; |
1454 | 1477 | ||
1455 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { | 1478 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { |
1456 | printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", | 1479 | printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", |
1457 | XENVBD_MAJOR, DEV_NAME); | 1480 | XENVBD_MAJOR, DEV_NAME); |
1458 | return -ENODEV; | 1481 | return -ENODEV; |
1459 | } | 1482 | } |
1460 | 1483 | ||
1461 | ret = xenbus_register_frontend(&blkfront_driver); | 1484 | ret = xenbus_register_frontend(&blkfront_driver); |
1462 | if (ret) { | 1485 | if (ret) { |
1463 | unregister_blkdev(XENVBD_MAJOR, DEV_NAME); | 1486 | unregister_blkdev(XENVBD_MAJOR, DEV_NAME); |
1464 | return ret; | 1487 | return ret; |
1465 | } | 1488 | } |
1466 | 1489 | ||
1467 | return 0; | 1490 | return 0; |
1468 | } | 1491 | } |
1469 | module_init(xlblk_init); | 1492 | module_init(xlblk_init); |
1470 | 1493 | ||
1471 | 1494 | ||
1472 | static void __exit xlblk_exit(void) | 1495 | static void __exit xlblk_exit(void) |
1473 | { | 1496 | { |
1474 | return xenbus_unregister_driver(&blkfront_driver); | 1497 | return xenbus_unregister_driver(&blkfront_driver); |
1475 | } | 1498 | } |
1476 | module_exit(xlblk_exit); | 1499 | module_exit(xlblk_exit); |
1477 | 1500 | ||
1478 | MODULE_DESCRIPTION("Xen virtual block device frontend"); | 1501 | MODULE_DESCRIPTION("Xen virtual block device frontend"); |
1479 | MODULE_LICENSE("GPL"); | 1502 | MODULE_LICENSE("GPL"); |
1480 | MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); | 1503 | MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); |
1481 | MODULE_ALIAS("xen:vbd"); | 1504 | MODULE_ALIAS("xen:vbd"); |
1482 | MODULE_ALIAS("xenblk"); | 1505 | MODULE_ALIAS("xenblk"); |
include/xen/interface/io/blkif.h
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * blkif.h | 2 | * blkif.h |
3 | * | 3 | * |
4 | * Unified block-device I/O interface for Xen guest OSes. | 4 | * Unified block-device I/O interface for Xen guest OSes. |
5 | * | 5 | * |
6 | * Copyright (c) 2003-2004, Keir Fraser | 6 | * Copyright (c) 2003-2004, Keir Fraser |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #ifndef __XEN_PUBLIC_IO_BLKIF_H__ | 9 | #ifndef __XEN_PUBLIC_IO_BLKIF_H__ |
10 | #define __XEN_PUBLIC_IO_BLKIF_H__ | 10 | #define __XEN_PUBLIC_IO_BLKIF_H__ |
11 | 11 | ||
12 | #include "ring.h" | 12 | #include "ring.h" |
13 | #include "../grant_table.h" | 13 | #include "../grant_table.h" |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * Front->back notifications: When enqueuing a new request, sending a | 16 | * Front->back notifications: When enqueuing a new request, sending a |
17 | * notification can be made conditional on req_event (i.e., the generic | 17 | * notification can be made conditional on req_event (i.e., the generic |
18 | * hold-off mechanism provided by the ring macros). Backends must set | 18 | * hold-off mechanism provided by the ring macros). Backends must set |
19 | * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). | 19 | * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). |
20 | * | 20 | * |
21 | * Back->front notifications: When enqueuing a new response, sending a | 21 | * Back->front notifications: When enqueuing a new response, sending a |
22 | * notification can be made conditional on rsp_event (i.e., the generic | 22 | * notification can be made conditional on rsp_event (i.e., the generic |
23 | * hold-off mechanism provided by the ring macros). Frontends must set | 23 | * hold-off mechanism provided by the ring macros). Frontends must set |
24 | * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). | 24 | * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). |
25 | */ | 25 | */ |
26 | 26 | ||
27 | typedef uint16_t blkif_vdev_t; | 27 | typedef uint16_t blkif_vdev_t; |
28 | typedef uint64_t blkif_sector_t; | 28 | typedef uint64_t blkif_sector_t; |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * REQUEST CODES. | 31 | * REQUEST CODES. |
32 | */ | 32 | */ |
33 | #define BLKIF_OP_READ 0 | 33 | #define BLKIF_OP_READ 0 |
34 | #define BLKIF_OP_WRITE 1 | 34 | #define BLKIF_OP_WRITE 1 |
35 | /* | 35 | /* |
36 | * Recognised only if "feature-barrier" is present in backend xenbus info. | 36 | * Recognised only if "feature-barrier" is present in backend xenbus info. |
37 | * The "feature_barrier" node contains a boolean indicating whether barrier | 37 | * The "feature_barrier" node contains a boolean indicating whether barrier |
38 | * requests are likely to succeed or fail. Either way, a barrier request | 38 | * requests are likely to succeed or fail. Either way, a barrier request |
39 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by | 39 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by |
40 | * the underlying block-device hardware. The boolean simply indicates whether | 40 | * the underlying block-device hardware. The boolean simply indicates whether |
41 | * or not it is worthwhile for the frontend to attempt barrier requests. | 41 | * or not it is worthwhile for the frontend to attempt barrier requests. |
42 | * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* | 42 | * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* |
43 | * create the "feature-barrier" node! | 43 | * create the "feature-barrier" node! |
44 | */ | 44 | */ |
45 | #define BLKIF_OP_WRITE_BARRIER 2 | 45 | #define BLKIF_OP_WRITE_BARRIER 2 |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * Recognised if "feature-flush-cache" is present in backend xenbus | 48 | * Recognised if "feature-flush-cache" is present in backend xenbus |
49 | * info. A flush will ask the underlying storage hardware to flush its | 49 | * info. A flush will ask the underlying storage hardware to flush its |
50 | * non-volatile caches as appropriate. The "feature-flush-cache" node | 50 | * non-volatile caches as appropriate. The "feature-flush-cache" node |
51 | * contains a boolean indicating whether flush requests are likely to | 51 | * contains a boolean indicating whether flush requests are likely to |
52 | * succeed or fail. Either way, a flush request may fail at any time | 52 | * succeed or fail. Either way, a flush request may fail at any time |
53 | * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying | 53 | * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying |
54 | * block-device hardware. The boolean simply indicates whether or not it | 54 | * block-device hardware. The boolean simply indicates whether or not it |
55 | * is worthwhile for the frontend to attempt flushes. If a backend does | 55 | * is worthwhile for the frontend to attempt flushes. If a backend does |
56 | * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the | 56 | * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the |
57 | * "feature-flush-cache" node! | 57 | * "feature-flush-cache" node! |
58 | */ | 58 | */ |
59 | #define BLKIF_OP_FLUSH_DISKCACHE 3 | 59 | #define BLKIF_OP_FLUSH_DISKCACHE 3 |
60 | 60 | ||
61 | /* | 61 | /* |
62 | * Recognised only if "feature-discard" is present in backend xenbus info. | 62 | * Recognised only if "feature-discard" is present in backend xenbus info. |
63 | * The "feature-discard" node contains a boolean indicating whether trim | 63 | * The "feature-discard" node contains a boolean indicating whether trim |
64 | * (ATA) or unmap (SCSI) - conviently called discard requests are likely | 64 | * (ATA) or unmap (SCSI) - conviently called discard requests are likely |
65 | * to succeed or fail. Either way, a discard request | 65 | * to succeed or fail. Either way, a discard request |
66 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by | 66 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by |
67 | * the underlying block-device hardware. The boolean simply indicates whether | 67 | * the underlying block-device hardware. The boolean simply indicates whether |
68 | * or not it is worthwhile for the frontend to attempt discard requests. | 68 | * or not it is worthwhile for the frontend to attempt discard requests. |
69 | * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* | 69 | * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* |
70 | * create the "feature-discard" node! | 70 | * create the "feature-discard" node! |
71 | * | 71 | * |
72 | * Discard operation is a request for the underlying block device to mark | 72 | * Discard operation is a request for the underlying block device to mark |
73 | * extents to be erased. However, discard does not guarantee that the blocks | 73 | * extents to be erased. However, discard does not guarantee that the blocks |
74 | * will be erased from the device - it is just a hint to the device | 74 | * will be erased from the device - it is just a hint to the device |
75 | * controller that these blocks are no longer in use. What the device | 75 | * controller that these blocks are no longer in use. What the device |
76 | * controller does with that information is left to the controller. | 76 | * controller does with that information is left to the controller. |
77 | * Discard operations are passed with sector_number as the | 77 | * Discard operations are passed with sector_number as the |
78 | * sector index to begin discard operations at and nr_sectors as the number of | 78 | * sector index to begin discard operations at and nr_sectors as the number of |
79 | * sectors to be discarded. The specified sectors should be discarded if the | 79 | * sectors to be discarded. The specified sectors should be discarded if the |
80 | * underlying block device supports trim (ATA) or unmap (SCSI) operations, | 80 | * underlying block device supports trim (ATA) or unmap (SCSI) operations, |
81 | * or a BLKIF_RSP_EOPNOTSUPP should be returned. | 81 | * or a BLKIF_RSP_EOPNOTSUPP should be returned. |
82 | * More information about trim/unmap operations at: | 82 | * More information about trim/unmap operations at: |
83 | * http://t13.org/Documents/UploadedDocuments/docs2008/ | 83 | * http://t13.org/Documents/UploadedDocuments/docs2008/ |
84 | * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc | 84 | * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc |
85 | * http://www.seagate.com/staticfiles/support/disc/manuals/ | 85 | * http://www.seagate.com/staticfiles/support/disc/manuals/ |
86 | * Interface%20manuals/100293068c.pdf | 86 | * Interface%20manuals/100293068c.pdf |
87 | * The backend can optionally provide three extra XenBus attributes to | ||
88 | * further optimize the discard functionality: | ||
89 | * 'discard-aligment' - Devices that support discard functionality may | ||
90 | * internally allocate space in units that are bigger than the exported | ||
91 | * logical block size. The discard-alignment parameter indicates how many bytes | ||
92 | * the beginning of the partition is offset from the internal allocation unit's | ||
93 | * natural alignment. | ||
94 | * 'discard-granularity' - Devices that support discard functionality may | ||
95 | * internally allocate space using units that are bigger than the logical block | ||
96 | * size. The discard-granularity parameter indicates the size of the internal | ||
97 | * allocation unit in bytes if reported by the device. Otherwise the | ||
98 | * discard-granularity will be set to match the device's physical block size. | ||
99 | * 'discard-secure' - All copies of the discarded sectors (potentially created | ||
100 | * by garbage collection) must also be erased. To use this feature, the flag | ||
101 | * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim. | ||
87 | */ | 102 | */ |
88 | #define BLKIF_OP_DISCARD 5 | 103 | #define BLKIF_OP_DISCARD 5 |
89 | 104 | ||
90 | /* | 105 | /* |
91 | * Maximum scatter/gather segments per request. | 106 | * Maximum scatter/gather segments per request. |
92 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. | 107 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. |
93 | * NB. This could be 12 if the ring indexes weren't stored in the same page. | 108 | * NB. This could be 12 if the ring indexes weren't stored in the same page. |
94 | */ | 109 | */ |
95 | #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 | 110 | #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 |
96 | 111 | ||
97 | struct blkif_request_rw { | 112 | struct blkif_request_rw { |
113 | uint8_t nr_segments; /* number of segments */ | ||
114 | blkif_vdev_t handle; /* only for read/write requests */ | ||
115 | #ifdef CONFIG_X86_64 | ||
116 | uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */ | ||
117 | #endif | ||
118 | uint64_t id; /* private guest value, echoed in resp */ | ||
98 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 119 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
99 | struct blkif_request_segment { | 120 | struct blkif_request_segment { |
100 | grant_ref_t gref; /* reference to I/O buffer frame */ | 121 | grant_ref_t gref; /* reference to I/O buffer frame */ |
101 | /* @first_sect: first sector in frame to transfer (inclusive). */ | 122 | /* @first_sect: first sector in frame to transfer (inclusive). */ |
102 | /* @last_sect: last sector in frame to transfer (inclusive). */ | 123 | /* @last_sect: last sector in frame to transfer (inclusive). */ |
103 | uint8_t first_sect, last_sect; | 124 | uint8_t first_sect, last_sect; |
104 | } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 125 | } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
105 | }; | 126 | } __attribute__((__packed__)); |
106 | 127 | ||
107 | struct blkif_request_discard { | 128 | struct blkif_request_discard { |
129 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */ | ||
130 | #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ | ||
131 | blkif_vdev_t _pad1; /* only for read/write requests */ | ||
132 | #ifdef CONFIG_X86_64 | ||
133 | uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ | ||
134 | #endif | ||
135 | uint64_t id; /* private guest value, echoed in resp */ | ||
108 | blkif_sector_t sector_number; | 136 | blkif_sector_t sector_number; |
109 | uint64_t nr_sectors; | 137 | uint64_t nr_sectors; |
110 | }; | 138 | uint8_t _pad3; |
139 | } __attribute__((__packed__)); | ||
111 | 140 | ||
112 | struct blkif_request { | 141 | struct blkif_request { |
113 | uint8_t operation; /* BLKIF_OP_??? */ | 142 | uint8_t operation; /* BLKIF_OP_??? */ |
114 | uint8_t nr_segments; /* number of segments */ | ||
115 | blkif_vdev_t handle; /* only for read/write requests */ | ||
116 | uint64_t id; /* private guest value, echoed in resp */ | ||
117 | union { | 143 | union { |
118 | struct blkif_request_rw rw; | 144 | struct blkif_request_rw rw; |
119 | struct blkif_request_discard discard; | 145 | struct blkif_request_discard discard; |
120 | } u; | 146 | } u; |
121 | }; | 147 | } __attribute__((__packed__)); |
122 | 148 | ||
123 | struct blkif_response { | 149 | struct blkif_response { |
124 | uint64_t id; /* copied from request */ | 150 | uint64_t id; /* copied from request */ |
125 | uint8_t operation; /* copied from request */ | 151 | uint8_t operation; /* copied from request */ |
126 | int16_t status; /* BLKIF_RSP_??? */ | 152 | int16_t status; /* BLKIF_RSP_??? */ |
127 | }; | 153 | }; |
128 | 154 | ||
129 | /* | 155 | /* |
130 | * STATUS RETURN CODES. | 156 | * STATUS RETURN CODES. |
131 | */ | 157 | */ |
132 | /* Operation not supported (only happens on barrier writes). */ | 158 | /* Operation not supported (only happens on barrier writes). */ |
133 | #define BLKIF_RSP_EOPNOTSUPP -2 | 159 | #define BLKIF_RSP_EOPNOTSUPP -2 |
134 | /* Operation failed for some unspecified reason (-EIO). */ | 160 | /* Operation failed for some unspecified reason (-EIO). */ |
135 | #define BLKIF_RSP_ERROR -1 | 161 | #define BLKIF_RSP_ERROR -1 |
136 | /* Operation completed successfully. */ | 162 | /* Operation completed successfully. */ |
137 | #define BLKIF_RSP_OKAY 0 | 163 | #define BLKIF_RSP_OKAY 0 |
138 | 164 | ||
139 | /* | 165 | /* |
140 | * Generate blkif ring structures and types. | 166 | * Generate blkif ring structures and types. |
141 | */ | 167 | */ |
142 | 168 | ||
143 | DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); | 169 | DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); |
144 | 170 | ||
145 | #define VDISK_CDROM 0x1 | 171 | #define VDISK_CDROM 0x1 |
146 | #define VDISK_REMOVABLE 0x2 | 172 | #define VDISK_REMOVABLE 0x2 |
147 | #define VDISK_READONLY 0x4 | 173 | #define VDISK_READONLY 0x4 |
148 | 174 | ||
149 | /* Xen-defined major numbers for virtual disks, they look strangely | 175 | /* Xen-defined major numbers for virtual disks, they look strangely |
150 | * familiar */ | 176 | * familiar */ |
151 | #define XEN_IDE0_MAJOR 3 | 177 | #define XEN_IDE0_MAJOR 3 |
152 | #define XEN_IDE1_MAJOR 22 | 178 | #define XEN_IDE1_MAJOR 22 |
153 | #define XEN_SCSI_DISK0_MAJOR 8 | 179 | #define XEN_SCSI_DISK0_MAJOR 8 |
154 | #define XEN_SCSI_DISK1_MAJOR 65 | 180 | #define XEN_SCSI_DISK1_MAJOR 65 |
155 | #define XEN_SCSI_DISK2_MAJOR 66 | 181 | #define XEN_SCSI_DISK2_MAJOR 66 |
156 | #define XEN_SCSI_DISK3_MAJOR 67 | 182 | #define XEN_SCSI_DISK3_MAJOR 67 |
157 | #define XEN_SCSI_DISK4_MAJOR 68 | 183 | #define XEN_SCSI_DISK4_MAJOR 68 |
158 | #define XEN_SCSI_DISK5_MAJOR 69 | 184 | #define XEN_SCSI_DISK5_MAJOR 69 |
159 | #define XEN_SCSI_DISK6_MAJOR 70 | 185 | #define XEN_SCSI_DISK6_MAJOR 70 |
160 | #define XEN_SCSI_DISK7_MAJOR 71 | 186 | #define XEN_SCSI_DISK7_MAJOR 71 |
161 | #define XEN_SCSI_DISK8_MAJOR 128 | 187 | #define XEN_SCSI_DISK8_MAJOR 128 |
162 | #define XEN_SCSI_DISK9_MAJOR 129 | 188 | #define XEN_SCSI_DISK9_MAJOR 129 |
163 | #define XEN_SCSI_DISK10_MAJOR 130 | 189 | #define XEN_SCSI_DISK10_MAJOR 130 |
164 | #define XEN_SCSI_DISK11_MAJOR 131 | 190 | #define XEN_SCSI_DISK11_MAJOR 131 |
165 | #define XEN_SCSI_DISK12_MAJOR 132 | 191 | #define XEN_SCSI_DISK12_MAJOR 132 |
166 | #define XEN_SCSI_DISK13_MAJOR 133 | 192 | #define XEN_SCSI_DISK13_MAJOR 133 |
167 | #define XEN_SCSI_DISK14_MAJOR 134 | 193 | #define XEN_SCSI_DISK14_MAJOR 134 |
168 | #define XEN_SCSI_DISK15_MAJOR 135 | 194 | #define XEN_SCSI_DISK15_MAJOR 135 |