Commit 16008d641670571ff4cd750b416c7caf2d89f467

Authored by Linus Torvalds

Merge branch 'for-3.3/drivers' of git://git.kernel.dk/linux-block

* 'for-3.3/drivers' of git://git.kernel.dk/linux-block:
  mtip32xx: do rebuild monitoring asynchronously
  xen-blkfront: Use kcalloc instead of kzalloc to allocate array
  mtip32xx: uninitialized variable in mtip_quiesce_io()
  mtip32xx: updates based on feedback
  xen-blkback: convert hole punching to discard request on loop devices
  xen/blkback: Move processing of BLKIF_OP_DISCARD from dispatch_rw_block_io
  xen/blk[front|back]: Enhance discard support with secure erasing support.
  xen/blk[front|back]: Squash blkif_request_rw and blkif_request_discard together
  mtip32xx: update to new ->make_request() API
  mtip32xx: add module.h include to avoid conflict with moduleh tree
  mtip32xx: mark a few more items static
  mtip32xx: ensure that all local functions are static
  mtip32xx: cleanup compat ioctl handling
  mtip32xx: fix warnings/errors on 32-bit compiles
  block: Add driver for Micron RealSSD pcie flash cards

Showing 11 changed files Inline Diff

drivers/block/Kconfig
1 # 1 #
2 # Block device driver configuration 2 # Block device driver configuration
3 # 3 #
4 4
5 menuconfig BLK_DEV 5 menuconfig BLK_DEV
6 bool "Block devices" 6 bool "Block devices"
7 depends on BLOCK 7 depends on BLOCK
8 default y 8 default y
9 ---help--- 9 ---help---
10 Say Y here to get to see options for various different block device 10 Say Y here to get to see options for various different block device
11 drivers. This option alone does not add any kernel code. 11 drivers. This option alone does not add any kernel code.
12 12
13 If you say N, all options in this submenu will be skipped and disabled; 13 If you say N, all options in this submenu will be skipped and disabled;
14 only do this if you know what you are doing. 14 only do this if you know what you are doing.
15 15
16 if BLK_DEV 16 if BLK_DEV
17 17
18 config BLK_DEV_FD 18 config BLK_DEV_FD
19 tristate "Normal floppy disk support" 19 tristate "Normal floppy disk support"
20 depends on ARCH_MAY_HAVE_PC_FDC 20 depends on ARCH_MAY_HAVE_PC_FDC
21 ---help--- 21 ---help---
22 If you want to use the floppy disk drive(s) of your PC under Linux, 22 If you want to use the floppy disk drive(s) of your PC under Linux,
23 say Y. Information about this driver, especially important for IBM 23 say Y. Information about this driver, especially important for IBM
24 Thinkpad users, is contained in 24 Thinkpad users, is contained in
25 <file:Documentation/blockdev/floppy.txt>. 25 <file:Documentation/blockdev/floppy.txt>.
26 That file also contains the location of the Floppy driver FAQ as 26 That file also contains the location of the Floppy driver FAQ as
27 well as location of the fdutils package used to configure additional 27 well as location of the fdutils package used to configure additional
28 parameters of the driver at run time. 28 parameters of the driver at run time.
29 29
30 To compile this driver as a module, choose M here: the 30 To compile this driver as a module, choose M here: the
31 module will be called floppy. 31 module will be called floppy.
32 32
33 config AMIGA_FLOPPY 33 config AMIGA_FLOPPY
34 tristate "Amiga floppy support" 34 tristate "Amiga floppy support"
35 depends on AMIGA 35 depends on AMIGA
36 36
37 config ATARI_FLOPPY 37 config ATARI_FLOPPY
38 tristate "Atari floppy support" 38 tristate "Atari floppy support"
39 depends on ATARI 39 depends on ATARI
40 40
41 config MAC_FLOPPY 41 config MAC_FLOPPY
42 tristate "Support for PowerMac floppy" 42 tristate "Support for PowerMac floppy"
43 depends on PPC_PMAC && !PPC_PMAC64 43 depends on PPC_PMAC && !PPC_PMAC64
44 help 44 help
45 If you have a SWIM-3 (Super Woz Integrated Machine 3; from Apple) 45 If you have a SWIM-3 (Super Woz Integrated Machine 3; from Apple)
46 floppy controller, say Y here. Most commonly found in PowerMacs. 46 floppy controller, say Y here. Most commonly found in PowerMacs.
47 47
48 config BLK_DEV_SWIM 48 config BLK_DEV_SWIM
49 tristate "Support for SWIM Macintosh floppy" 49 tristate "Support for SWIM Macintosh floppy"
50 depends on M68K && MAC 50 depends on M68K && MAC
51 help 51 help
52 You should select this option if you want floppy support 52 You should select this option if you want floppy support
53 and you don't have a II, IIfx, Q900, Q950 or AV series. 53 and you don't have a II, IIfx, Q900, Q950 or AV series.
54 54
55 config AMIGA_Z2RAM 55 config AMIGA_Z2RAM
56 tristate "Amiga Zorro II ramdisk support" 56 tristate "Amiga Zorro II ramdisk support"
57 depends on ZORRO 57 depends on ZORRO
58 help 58 help
59 This enables support for using Chip RAM and Zorro II RAM as a 59 This enables support for using Chip RAM and Zorro II RAM as a
60 ramdisk or as a swap partition. Say Y if you want to include this 60 ramdisk or as a swap partition. Say Y if you want to include this
61 driver in the kernel. 61 driver in the kernel.
62 62
63 To compile this driver as a module, choose M here: the 63 To compile this driver as a module, choose M here: the
64 module will be called z2ram. 64 module will be called z2ram.
65 65
66 config BLK_DEV_XD 66 config BLK_DEV_XD
67 tristate "XT hard disk support" 67 tristate "XT hard disk support"
68 depends on ISA && ISA_DMA_API 68 depends on ISA && ISA_DMA_API
69 select CHECK_SIGNATURE 69 select CHECK_SIGNATURE
70 help 70 help
71 Very old 8 bit hard disk controllers used in the IBM XT computer 71 Very old 8 bit hard disk controllers used in the IBM XT computer
72 will be supported if you say Y here. 72 will be supported if you say Y here.
73 73
74 To compile this driver as a module, choose M here: the 74 To compile this driver as a module, choose M here: the
75 module will be called xd. 75 module will be called xd.
76 76
77 It's pretty unlikely that you have one of these: say N. 77 It's pretty unlikely that you have one of these: say N.
78 78
79 config GDROM 79 config GDROM
80 tristate "SEGA Dreamcast GD-ROM drive" 80 tristate "SEGA Dreamcast GD-ROM drive"
81 depends on SH_DREAMCAST 81 depends on SH_DREAMCAST
82 help 82 help
83 A standard SEGA Dreamcast comes with a modified CD ROM drive called a 83 A standard SEGA Dreamcast comes with a modified CD ROM drive called a
84 "GD-ROM" by SEGA to signify it is capable of reading special disks 84 "GD-ROM" by SEGA to signify it is capable of reading special disks
85 with up to 1 GB of data. This drive will also read standard CD ROM 85 with up to 1 GB of data. This drive will also read standard CD ROM
86 disks. Select this option to access any disks in your GD ROM drive. 86 disks. Select this option to access any disks in your GD ROM drive.
87 Most users will want to say "Y" here. 87 Most users will want to say "Y" here.
88 You can also build this as a module which will be called gdrom. 88 You can also build this as a module which will be called gdrom.
89 89
90 config PARIDE 90 config PARIDE
91 tristate "Parallel port IDE device support" 91 tristate "Parallel port IDE device support"
92 depends on PARPORT_PC 92 depends on PARPORT_PC
93 ---help--- 93 ---help---
94 There are many external CD-ROM and disk devices that connect through 94 There are many external CD-ROM and disk devices that connect through
95 your computer's parallel port. Most of them are actually IDE devices 95 your computer's parallel port. Most of them are actually IDE devices
96 using a parallel port IDE adapter. This option enables the PARIDE 96 using a parallel port IDE adapter. This option enables the PARIDE
97 subsystem which contains drivers for many of these external drives. 97 subsystem which contains drivers for many of these external drives.
98 Read <file:Documentation/blockdev/paride.txt> for more information. 98 Read <file:Documentation/blockdev/paride.txt> for more information.
99 99
100 If you have said Y to the "Parallel-port support" configuration 100 If you have said Y to the "Parallel-port support" configuration
101 option, you may share a single port between your printer and other 101 option, you may share a single port between your printer and other
102 parallel port devices. Answer Y to build PARIDE support into your 102 parallel port devices. Answer Y to build PARIDE support into your
103 kernel, or M if you would like to build it as a loadable module. If 103 kernel, or M if you would like to build it as a loadable module. If
104 your parallel port support is in a loadable module, you must build 104 your parallel port support is in a loadable module, you must build
105 PARIDE as a module. If you built PARIDE support into your kernel, 105 PARIDE as a module. If you built PARIDE support into your kernel,
106 you may still build the individual protocol modules and high-level 106 you may still build the individual protocol modules and high-level
107 drivers as loadable modules. If you build this support as a module, 107 drivers as loadable modules. If you build this support as a module,
108 it will be called paride. 108 it will be called paride.
109 109
110 To use the PARIDE support, you must say Y or M here and also to at 110 To use the PARIDE support, you must say Y or M here and also to at
111 least one high-level driver (e.g. "Parallel port IDE disks", 111 least one high-level driver (e.g. "Parallel port IDE disks",
112 "Parallel port ATAPI CD-ROMs", "Parallel port ATAPI disks" etc.) and 112 "Parallel port ATAPI CD-ROMs", "Parallel port ATAPI disks" etc.) and
113 to at least one protocol driver (e.g. "ATEN EH-100 protocol", 113 to at least one protocol driver (e.g. "ATEN EH-100 protocol",
114 "MicroSolutions backpack protocol", "DataStor Commuter protocol" 114 "MicroSolutions backpack protocol", "DataStor Commuter protocol"
115 etc.). 115 etc.).
116 116
117 source "drivers/block/paride/Kconfig" 117 source "drivers/block/paride/Kconfig"
118 118
119 source "drivers/block/mtip32xx/Kconfig"
120
119 config BLK_CPQ_DA 121 config BLK_CPQ_DA
120 tristate "Compaq SMART2 support" 122 tristate "Compaq SMART2 support"
121 depends on PCI && VIRT_TO_BUS 123 depends on PCI && VIRT_TO_BUS
122 help 124 help
123 This is the driver for Compaq Smart Array controllers. Everyone 125 This is the driver for Compaq Smart Array controllers. Everyone
124 using these boards should say Y here. See the file 126 using these boards should say Y here. See the file
125 <file:Documentation/blockdev/cpqarray.txt> for the current list of 127 <file:Documentation/blockdev/cpqarray.txt> for the current list of
126 boards supported by this driver, and for further information on the 128 boards supported by this driver, and for further information on the
127 use of this driver. 129 use of this driver.
128 130
129 config BLK_CPQ_CISS_DA 131 config BLK_CPQ_CISS_DA
130 tristate "Compaq Smart Array 5xxx support" 132 tristate "Compaq Smart Array 5xxx support"
131 depends on PCI 133 depends on PCI
132 help 134 help
133 This is the driver for Compaq Smart Array 5xxx controllers. 135 This is the driver for Compaq Smart Array 5xxx controllers.
134 Everyone using these boards should say Y here. 136 Everyone using these boards should say Y here.
135 See <file:Documentation/blockdev/cciss.txt> for the current list of 137 See <file:Documentation/blockdev/cciss.txt> for the current list of
136 boards supported by this driver, and for further information 138 boards supported by this driver, and for further information
137 on the use of this driver. 139 on the use of this driver.
138 140
139 config CISS_SCSI_TAPE 141 config CISS_SCSI_TAPE
140 bool "SCSI tape drive support for Smart Array 5xxx" 142 bool "SCSI tape drive support for Smart Array 5xxx"
141 depends on BLK_CPQ_CISS_DA && PROC_FS 143 depends on BLK_CPQ_CISS_DA && PROC_FS
142 depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA 144 depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA
143 help 145 help
144 When enabled (Y), this option allows SCSI tape drives and SCSI medium 146 When enabled (Y), this option allows SCSI tape drives and SCSI medium
145 changers (tape robots) to be accessed via a Compaq 5xxx array 147 changers (tape robots) to be accessed via a Compaq 5xxx array
146 controller. (See <file:Documentation/blockdev/cciss.txt> for more details.) 148 controller. (See <file:Documentation/blockdev/cciss.txt> for more details.)
147 149
148 "SCSI support" and "SCSI tape support" must also be enabled for this 150 "SCSI support" and "SCSI tape support" must also be enabled for this
149 option to work. 151 option to work.
150 152
151 When this option is disabled (N), the SCSI portion of the driver 153 When this option is disabled (N), the SCSI portion of the driver
152 is not compiled. 154 is not compiled.
153 155
154 config BLK_DEV_DAC960 156 config BLK_DEV_DAC960
155 tristate "Mylex DAC960/DAC1100 PCI RAID Controller support" 157 tristate "Mylex DAC960/DAC1100 PCI RAID Controller support"
156 depends on PCI 158 depends on PCI
157 help 159 help
158 This driver adds support for the Mylex DAC960, AcceleRAID, and 160 This driver adds support for the Mylex DAC960, AcceleRAID, and
159 eXtremeRAID PCI RAID controllers. See the file 161 eXtremeRAID PCI RAID controllers. See the file
160 <file:Documentation/blockdev/README.DAC960> for further information 162 <file:Documentation/blockdev/README.DAC960> for further information
161 about this driver. 163 about this driver.
162 164
163 To compile this driver as a module, choose M here: the 165 To compile this driver as a module, choose M here: the
164 module will be called DAC960. 166 module will be called DAC960.
165 167
166 config BLK_DEV_UMEM 168 config BLK_DEV_UMEM
167 tristate "Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)" 169 tristate "Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)"
168 depends on PCI && EXPERIMENTAL 170 depends on PCI && EXPERIMENTAL
169 ---help--- 171 ---help---
170 Saying Y here will include support for the MM5415 family of 172 Saying Y here will include support for the MM5415 family of
171 battery backed (Non-volatile) RAM cards. 173 battery backed (Non-volatile) RAM cards.
172 <http://www.umem.com/> 174 <http://www.umem.com/>
173 175
174 The cards appear as block devices that can be partitioned into 176 The cards appear as block devices that can be partitioned into
175 as many as 15 partitions. 177 as many as 15 partitions.
176 178
177 To compile this driver as a module, choose M here: the 179 To compile this driver as a module, choose M here: the
178 module will be called umem. 180 module will be called umem.
179 181
180 The umem driver has not yet been allocated a MAJOR number, so 182 The umem driver has not yet been allocated a MAJOR number, so
181 one is chosen dynamically. 183 one is chosen dynamically.
182 184
183 config BLK_DEV_UBD 185 config BLK_DEV_UBD
184 bool "Virtual block device" 186 bool "Virtual block device"
185 depends on UML 187 depends on UML
186 ---help--- 188 ---help---
187 The User-Mode Linux port includes a driver called UBD which will let 189 The User-Mode Linux port includes a driver called UBD which will let
188 you access arbitrary files on the host computer as block devices. 190 you access arbitrary files on the host computer as block devices.
189 Unless you know that you do not need such virtual block devices say 191 Unless you know that you do not need such virtual block devices say
190 Y here. 192 Y here.
191 193
192 config BLK_DEV_UBD_SYNC 194 config BLK_DEV_UBD_SYNC
193 bool "Always do synchronous disk IO for UBD" 195 bool "Always do synchronous disk IO for UBD"
194 depends on BLK_DEV_UBD 196 depends on BLK_DEV_UBD
195 ---help--- 197 ---help---
196 Writes to the virtual block device are not immediately written to the 198 Writes to the virtual block device are not immediately written to the
197 host's disk; this may cause problems if, for example, the User-Mode 199 host's disk; this may cause problems if, for example, the User-Mode
198 Linux 'Virtual Machine' uses a journalling filesystem and the host 200 Linux 'Virtual Machine' uses a journalling filesystem and the host
199 computer crashes. 201 computer crashes.
200 202
201 Synchronous operation (i.e. always writing data to the host's disk 203 Synchronous operation (i.e. always writing data to the host's disk
202 immediately) is configurable on a per-UBD basis by using a special 204 immediately) is configurable on a per-UBD basis by using a special
203 kernel command line option. Alternatively, you can say Y here to 205 kernel command line option. Alternatively, you can say Y here to
204 turn on synchronous operation by default for all block devices. 206 turn on synchronous operation by default for all block devices.
205 207
206 If you're running a journalling file system (like reiserfs, for 208 If you're running a journalling file system (like reiserfs, for
207 example) in your virtual machine, you will want to say Y here. If 209 example) in your virtual machine, you will want to say Y here. If
208 you care for the safety of the data in your virtual machine, Y is a 210 you care for the safety of the data in your virtual machine, Y is a
209 wise choice too. In all other cases (for example, if you're just 211 wise choice too. In all other cases (for example, if you're just
210 playing around with User-Mode Linux) you can choose N. 212 playing around with User-Mode Linux) you can choose N.
211 213
212 config BLK_DEV_COW_COMMON 214 config BLK_DEV_COW_COMMON
213 bool 215 bool
214 default BLK_DEV_UBD 216 default BLK_DEV_UBD
215 217
216 config BLK_DEV_LOOP 218 config BLK_DEV_LOOP
217 tristate "Loopback device support" 219 tristate "Loopback device support"
218 ---help--- 220 ---help---
219 Saying Y here will allow you to use a regular file as a block 221 Saying Y here will allow you to use a regular file as a block
220 device; you can then create a file system on that block device and 222 device; you can then create a file system on that block device and
221 mount it just as you would mount other block devices such as hard 223 mount it just as you would mount other block devices such as hard
222 drive partitions, CD-ROM drives or floppy drives. The loop devices 224 drive partitions, CD-ROM drives or floppy drives. The loop devices
223 are block special device files with major number 7 and typically 225 are block special device files with major number 7 and typically
224 called /dev/loop0, /dev/loop1 etc. 226 called /dev/loop0, /dev/loop1 etc.
225 227
226 This is useful if you want to check an ISO 9660 file system before 228 This is useful if you want to check an ISO 9660 file system before
227 burning the CD, or if you want to use floppy images without first 229 burning the CD, or if you want to use floppy images without first
228 writing them to floppy. Furthermore, some Linux distributions avoid 230 writing them to floppy. Furthermore, some Linux distributions avoid
229 the need for a dedicated Linux partition by keeping their complete 231 the need for a dedicated Linux partition by keeping their complete
230 root file system inside a DOS FAT file using this loop device 232 root file system inside a DOS FAT file using this loop device
231 driver. 233 driver.
232 234
233 To use the loop device, you need the losetup utility, found in the 235 To use the loop device, you need the losetup utility, found in the
234 util-linux package, see 236 util-linux package, see
235 <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>. 237 <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>.
236 238
237 The loop device driver can also be used to "hide" a file system in 239 The loop device driver can also be used to "hide" a file system in
238 a disk partition, floppy, or regular file, either using encryption 240 a disk partition, floppy, or regular file, either using encryption
239 (scrambling the data) or steganography (hiding the data in the low 241 (scrambling the data) or steganography (hiding the data in the low
240 bits of, say, a sound file). This is also safe if the file resides 242 bits of, say, a sound file). This is also safe if the file resides
241 on a remote file server. 243 on a remote file server.
242 244
243 There are several ways of encrypting disks. Some of these require 245 There are several ways of encrypting disks. Some of these require
244 kernel patches. The vanilla kernel offers the cryptoloop option 246 kernel patches. The vanilla kernel offers the cryptoloop option
245 and a Device Mapper target (which is superior, as it supports all 247 and a Device Mapper target (which is superior, as it supports all
246 file systems). If you want to use the cryptoloop, say Y to both 248 file systems). If you want to use the cryptoloop, say Y to both
247 LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12 249 LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12
248 or later) version of util-linux. Additionally, be aware that 250 or later) version of util-linux. Additionally, be aware that
249 the cryptoloop is not safe for storing journaled filesystems. 251 the cryptoloop is not safe for storing journaled filesystems.
250 252
251 Note that this loop device has nothing to do with the loopback 253 Note that this loop device has nothing to do with the loopback
252 device used for network connections from the machine to itself. 254 device used for network connections from the machine to itself.
253 255
254 To compile this driver as a module, choose M here: the 256 To compile this driver as a module, choose M here: the
255 module will be called loop. 257 module will be called loop.
256 258
257 Most users will answer N here. 259 Most users will answer N here.
258 260
259 config BLK_DEV_LOOP_MIN_COUNT 261 config BLK_DEV_LOOP_MIN_COUNT
260 int "Number of loop devices to pre-create at init time" 262 int "Number of loop devices to pre-create at init time"
261 depends on BLK_DEV_LOOP 263 depends on BLK_DEV_LOOP
262 default 8 264 default 8
263 help 265 help
264 Static number of loop devices to be unconditionally pre-created 266 Static number of loop devices to be unconditionally pre-created
265 at init time. 267 at init time.
266 268
267 This default value can be overwritten on the kernel command 269 This default value can be overwritten on the kernel command
268 line or with module-parameter loop.max_loop. 270 line or with module-parameter loop.max_loop.
269 271
270 The historic default is 8. If a late 2011 version of losetup(8) 272 The historic default is 8. If a late 2011 version of losetup(8)
271 is used, it can be set to 0, since needed loop devices can be 273 is used, it can be set to 0, since needed loop devices can be
272 dynamically allocated with the /dev/loop-control interface. 274 dynamically allocated with the /dev/loop-control interface.
273 275
274 config BLK_DEV_CRYPTOLOOP 276 config BLK_DEV_CRYPTOLOOP
275 tristate "Cryptoloop Support" 277 tristate "Cryptoloop Support"
276 select CRYPTO 278 select CRYPTO
277 select CRYPTO_CBC 279 select CRYPTO_CBC
278 depends on BLK_DEV_LOOP 280 depends on BLK_DEV_LOOP
279 ---help--- 281 ---help---
280 Say Y here if you want to be able to use the ciphers that are 282 Say Y here if you want to be able to use the ciphers that are
281 provided by the CryptoAPI as loop transformation. This might be 283 provided by the CryptoAPI as loop transformation. This might be
282 used as hard disk encryption. 284 used as hard disk encryption.
283 285
284 WARNING: This device is not safe for journaled file systems like 286 WARNING: This device is not safe for journaled file systems like
285 ext3 or Reiserfs. Please use the Device Mapper crypto module 287 ext3 or Reiserfs. Please use the Device Mapper crypto module
286 instead, which can be configured to be on-disk compatible with the 288 instead, which can be configured to be on-disk compatible with the
287 cryptoloop device. 289 cryptoloop device.
288 290
289 source "drivers/block/drbd/Kconfig" 291 source "drivers/block/drbd/Kconfig"
290 292
291 config BLK_DEV_NBD 293 config BLK_DEV_NBD
292 tristate "Network block device support" 294 tristate "Network block device support"
293 depends on NET 295 depends on NET
294 ---help--- 296 ---help---
295 Saying Y here will allow your computer to be a client for network 297 Saying Y here will allow your computer to be a client for network
296 block devices, i.e. it will be able to use block devices exported by 298 block devices, i.e. it will be able to use block devices exported by
297 servers (mount file systems on them etc.). Communication between 299 servers (mount file systems on them etc.). Communication between
298 client and server works over TCP/IP networking, but to the client 300 client and server works over TCP/IP networking, but to the client
299 program this is hidden: it looks like a regular local file access to 301 program this is hidden: it looks like a regular local file access to
300 a block device special file such as /dev/nd0. 302 a block device special file such as /dev/nd0.
301 303
302 Network block devices also allows you to run a block-device in 304 Network block devices also allows you to run a block-device in
303 userland (making server and client physically the same computer, 305 userland (making server and client physically the same computer,
304 communicating using the loopback network device). 306 communicating using the loopback network device).
305 307
306 Read <file:Documentation/blockdev/nbd.txt> for more information, 308 Read <file:Documentation/blockdev/nbd.txt> for more information,
307 especially about where to find the server code, which runs in user 309 especially about where to find the server code, which runs in user
308 space and does not need special kernel support. 310 space and does not need special kernel support.
309 311
310 Note that this has nothing to do with the network file systems NFS 312 Note that this has nothing to do with the network file systems NFS
311 or Coda; you can say N here even if you intend to use NFS or Coda. 313 or Coda; you can say N here even if you intend to use NFS or Coda.
312 314
313 To compile this driver as a module, choose M here: the 315 To compile this driver as a module, choose M here: the
314 module will be called nbd. 316 module will be called nbd.
315 317
316 If unsure, say N. 318 If unsure, say N.
317 319
318 config BLK_DEV_OSD 320 config BLK_DEV_OSD
319 tristate "OSD object-as-blkdev support" 321 tristate "OSD object-as-blkdev support"
320 depends on SCSI_OSD_ULD 322 depends on SCSI_OSD_ULD
321 ---help--- 323 ---help---
322 Saying Y or M here will allow the exporting of a single SCSI 324 Saying Y or M here will allow the exporting of a single SCSI
323 OSD (object-based storage) object as a Linux block device. 325 OSD (object-based storage) object as a Linux block device.
324 326
325 For example, if you create a 2G object on an OSD device, 327 For example, if you create a 2G object on an OSD device,
326 you can then use this module to present that 2G object as 328 you can then use this module to present that 2G object as
327 a Linux block device. 329 a Linux block device.
328 330
329 To compile this driver as a module, choose M here: the 331 To compile this driver as a module, choose M here: the
330 module will be called osdblk. 332 module will be called osdblk.
331 333
332 If unsure, say N. 334 If unsure, say N.
333 335
334 config BLK_DEV_SX8 336 config BLK_DEV_SX8
335 tristate "Promise SATA SX8 support" 337 tristate "Promise SATA SX8 support"
336 depends on PCI 338 depends on PCI
337 ---help--- 339 ---help---
338 Saying Y or M here will enable support for the 340 Saying Y or M here will enable support for the
339 Promise SATA SX8 controllers. 341 Promise SATA SX8 controllers.
340 342
341 Use devices /dev/sx8/$N and /dev/sx8/$Np$M. 343 Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
342 344
343 config BLK_DEV_UB 345 config BLK_DEV_UB
344 tristate "Low Performance USB Block driver" 346 tristate "Low Performance USB Block driver"
345 depends on USB 347 depends on USB
346 help 348 help
347 This driver supports certain USB attached storage devices 349 This driver supports certain USB attached storage devices
348 such as flash keys. 350 such as flash keys.
349 351
350 If you enable this driver, it is recommended to avoid conflicts 352 If you enable this driver, it is recommended to avoid conflicts
351 with usb-storage by enabling USB_LIBUSUAL. 353 with usb-storage by enabling USB_LIBUSUAL.
352 354
353 If unsure, say N. 355 If unsure, say N.
354 356
355 config BLK_DEV_RAM 357 config BLK_DEV_RAM
356 tristate "RAM block device support" 358 tristate "RAM block device support"
357 ---help--- 359 ---help---
358 Saying Y here will allow you to use a portion of your RAM memory as 360 Saying Y here will allow you to use a portion of your RAM memory as
359 a block device, so that you can make file systems on it, read and 361 a block device, so that you can make file systems on it, read and
360 write to it and do all the other things that you can do with normal 362 write to it and do all the other things that you can do with normal
361 block devices (such as hard drives). It is usually used to load and 363 block devices (such as hard drives). It is usually used to load and
362 store a copy of a minimal root file system off of a floppy into RAM 364 store a copy of a minimal root file system off of a floppy into RAM
363 during the initial install of Linux. 365 during the initial install of Linux.
364 366
365 Note that the kernel command line option "ramdisk=XX" is now obsolete. 367 Note that the kernel command line option "ramdisk=XX" is now obsolete.
366 For details, read <file:Documentation/blockdev/ramdisk.txt>. 368 For details, read <file:Documentation/blockdev/ramdisk.txt>.
367 369
368 To compile this driver as a module, choose M here: the 370 To compile this driver as a module, choose M here: the
369 module will be called rd. 371 module will be called rd.
370 372
371 Most normal users won't need the RAM disk functionality, and can 373 Most normal users won't need the RAM disk functionality, and can
372 thus say N here. 374 thus say N here.
373 375
374 config BLK_DEV_RAM_COUNT 376 config BLK_DEV_RAM_COUNT
375 int "Default number of RAM disks" 377 int "Default number of RAM disks"
376 default "16" 378 default "16"
377 depends on BLK_DEV_RAM 379 depends on BLK_DEV_RAM
378 help 380 help
379 The default value is 16 RAM disks. Change this if you know what you 381 The default value is 16 RAM disks. Change this if you know what you
380 are doing. If you boot from a filesystem that needs to be extracted 382 are doing. If you boot from a filesystem that needs to be extracted
381 in memory, you will need at least one RAM disk (e.g. root on cramfs). 383 in memory, you will need at least one RAM disk (e.g. root on cramfs).
382 384
383 config BLK_DEV_RAM_SIZE 385 config BLK_DEV_RAM_SIZE
384 int "Default RAM disk size (kbytes)" 386 int "Default RAM disk size (kbytes)"
385 depends on BLK_DEV_RAM 387 depends on BLK_DEV_RAM
386 default "4096" 388 default "4096"
387 help 389 help
388 The default value is 4096 kilobytes. Only change this if you know 390 The default value is 4096 kilobytes. Only change this if you know
389 what you are doing. 391 what you are doing.
390 392
391 config BLK_DEV_XIP 393 config BLK_DEV_XIP
392 bool "Support XIP filesystems on RAM block device" 394 bool "Support XIP filesystems on RAM block device"
393 depends on BLK_DEV_RAM 395 depends on BLK_DEV_RAM
394 default n 396 default n
395 help 397 help
396 Support XIP filesystems (such as ext2 with XIP support on) on 398 Support XIP filesystems (such as ext2 with XIP support on) on
397 top of block ram device. This will slightly enlarge the kernel, and 399 top of block ram device. This will slightly enlarge the kernel, and
398 will prevent RAM block device backing store memory from being 400 will prevent RAM block device backing store memory from being
399 allocated from highmem (only a problem for highmem systems). 401 allocated from highmem (only a problem for highmem systems).
400 402
401 config CDROM_PKTCDVD 403 config CDROM_PKTCDVD
402 tristate "Packet writing on CD/DVD media" 404 tristate "Packet writing on CD/DVD media"
403 depends on !UML 405 depends on !UML
404 help 406 help
405 If you have a CDROM/DVD drive that supports packet writing, say 407 If you have a CDROM/DVD drive that supports packet writing, say
406 Y to include support. It should work with any MMC/Mt Fuji 408 Y to include support. It should work with any MMC/Mt Fuji
407 compliant ATAPI or SCSI drive, which is just about any newer 409 compliant ATAPI or SCSI drive, which is just about any newer
408 DVD/CD writer. 410 DVD/CD writer.
409 411
410 Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs 412 Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs
411 is possible. 413 is possible.
412 DVD-RW disks must be in restricted overwrite mode. 414 DVD-RW disks must be in restricted overwrite mode.
413 415
414 See the file <file:Documentation/cdrom/packet-writing.txt> 416 See the file <file:Documentation/cdrom/packet-writing.txt>
415 for further information on the use of this driver. 417 for further information on the use of this driver.
416 418
417 To compile this driver as a module, choose M here: the 419 To compile this driver as a module, choose M here: the
418 module will be called pktcdvd. 420 module will be called pktcdvd.
419 421
420 config CDROM_PKTCDVD_BUFFERS 422 config CDROM_PKTCDVD_BUFFERS
421 int "Free buffers for data gathering" 423 int "Free buffers for data gathering"
422 depends on CDROM_PKTCDVD 424 depends on CDROM_PKTCDVD
423 default "8" 425 default "8"
424 help 426 help
425 This controls the maximum number of active concurrent packets. More 427 This controls the maximum number of active concurrent packets. More
426 concurrent packets can increase write performance, but also require 428 concurrent packets can increase write performance, but also require
427 more memory. Each concurrent packet will require approximately 64Kb 429 more memory. Each concurrent packet will require approximately 64Kb
428 of non-swappable kernel memory, memory which will be allocated when 430 of non-swappable kernel memory, memory which will be allocated when
429 a disc is opened for writing. 431 a disc is opened for writing.
430 432
431 config CDROM_PKTCDVD_WCACHE 433 config CDROM_PKTCDVD_WCACHE
432 bool "Enable write caching (EXPERIMENTAL)" 434 bool "Enable write caching (EXPERIMENTAL)"
433 depends on CDROM_PKTCDVD && EXPERIMENTAL 435 depends on CDROM_PKTCDVD && EXPERIMENTAL
434 help 436 help
435 If enabled, write caching will be set for the CD-R/W device. For now 437 If enabled, write caching will be set for the CD-R/W device. For now
436 this option is dangerous unless the CD-RW media is known good, as we 438 this option is dangerous unless the CD-RW media is known good, as we
437 don't do deferred write error handling yet. 439 don't do deferred write error handling yet.
438 440
439 config ATA_OVER_ETH 441 config ATA_OVER_ETH
440 tristate "ATA over Ethernet support" 442 tristate "ATA over Ethernet support"
441 depends on NET 443 depends on NET
442 help 444 help
443 This driver provides Support for ATA over Ethernet block 445 This driver provides Support for ATA over Ethernet block
444 devices like the Coraid EtherDrive (R) Storage Blade. 446 devices like the Coraid EtherDrive (R) Storage Blade.
445 447
446 config MG_DISK 448 config MG_DISK
447 tristate "mGine mflash, gflash support" 449 tristate "mGine mflash, gflash support"
448 depends on ARM && GPIOLIB 450 depends on ARM && GPIOLIB
449 help 451 help
450 mGine mFlash(gFlash) block device driver 452 mGine mFlash(gFlash) block device driver
451 453
452 config MG_DISK_RES 454 config MG_DISK_RES
453 int "Size of reserved area before MBR" 455 int "Size of reserved area before MBR"
454 depends on MG_DISK 456 depends on MG_DISK
455 default 0 457 default 0
456 help 458 help
457 Define size of reserved area that usually used for boot. Unit is KB. 459 Define size of reserved area that usually used for boot. Unit is KB.
458 All of the block device operation will be taken this value as start 460 All of the block device operation will be taken this value as start
459 offset 461 offset
460 Examples: 462 Examples:
461 1024 => 1 MB 463 1024 => 1 MB
462 464
463 config SUNVDC 465 config SUNVDC
464 tristate "Sun Virtual Disk Client support" 466 tristate "Sun Virtual Disk Client support"
465 depends on SUN_LDOMS 467 depends on SUN_LDOMS
466 help 468 help
467 Support for virtual disk devices as a client under Sun 469 Support for virtual disk devices as a client under Sun
468 Logical Domains. 470 Logical Domains.
469 471
470 source "drivers/s390/block/Kconfig" 472 source "drivers/s390/block/Kconfig"
471 473
472 config XILINX_SYSACE 474 config XILINX_SYSACE
473 tristate "Xilinx SystemACE support" 475 tristate "Xilinx SystemACE support"
474 depends on 4xx || MICROBLAZE 476 depends on 4xx || MICROBLAZE
475 help 477 help
476 Include support for the Xilinx SystemACE CompactFlash interface 478 Include support for the Xilinx SystemACE CompactFlash interface
477 479
478 config XEN_BLKDEV_FRONTEND 480 config XEN_BLKDEV_FRONTEND
479 tristate "Xen virtual block device support" 481 tristate "Xen virtual block device support"
480 depends on XEN 482 depends on XEN
481 default y 483 default y
482 select XEN_XENBUS_FRONTEND 484 select XEN_XENBUS_FRONTEND
483 help 485 help
484 This driver implements the front-end of the Xen virtual 486 This driver implements the front-end of the Xen virtual
485 block device driver. It communicates with a back-end driver 487 block device driver. It communicates with a back-end driver
486 in another domain which drives the actual block device. 488 in another domain which drives the actual block device.
487 489
488 config XEN_BLKDEV_BACKEND 490 config XEN_BLKDEV_BACKEND
489 tristate "Xen block-device backend driver" 491 tristate "Xen block-device backend driver"
490 depends on XEN_BACKEND 492 depends on XEN_BACKEND
491 help 493 help
492 The block-device backend driver allows the kernel to export its 494 The block-device backend driver allows the kernel to export its
493 block devices to other guests via a high-performance shared-memory 495 block devices to other guests via a high-performance shared-memory
494 interface. 496 interface.
495 497
496 The corresponding Linux frontend driver is enabled by the 498 The corresponding Linux frontend driver is enabled by the
497 CONFIG_XEN_BLKDEV_FRONTEND configuration option. 499 CONFIG_XEN_BLKDEV_FRONTEND configuration option.
498 500
499 The backend driver attaches itself to a any block device specified 501 The backend driver attaches itself to a any block device specified
500 in the XenBus configuration. There are no limits to what the block 502 in the XenBus configuration. There are no limits to what the block
501 device as long as it has a major and minor. 503 device as long as it has a major and minor.
502 504
503 If you are compiling a kernel to run in a Xen block backend driver 505 If you are compiling a kernel to run in a Xen block backend driver
504 domain (often this is domain 0) you should say Y here. To 506 domain (often this is domain 0) you should say Y here. To
505 compile this driver as a module, chose M here: the module 507 compile this driver as a module, chose M here: the module
506 will be called xen-blkback. 508 will be called xen-blkback.
507 509
508 510
509 config VIRTIO_BLK 511 config VIRTIO_BLK
510 tristate "Virtio block driver (EXPERIMENTAL)" 512 tristate "Virtio block driver (EXPERIMENTAL)"
511 depends on EXPERIMENTAL && VIRTIO 513 depends on EXPERIMENTAL && VIRTIO
512 ---help--- 514 ---help---
513 This is the virtual block driver for virtio. It can be used with 515 This is the virtual block driver for virtio. It can be used with
514 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. 516 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
515 517
516 config BLK_DEV_HD 518 config BLK_DEV_HD
517 bool "Very old hard disk (MFM/RLL/IDE) driver" 519 bool "Very old hard disk (MFM/RLL/IDE) driver"
518 depends on HAVE_IDE 520 depends on HAVE_IDE
519 depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN 521 depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN
520 help 522 help
521 This is a very old hard disk driver that lacks the enhanced 523 This is a very old hard disk driver that lacks the enhanced
522 functionality of the newer ones. 524 functionality of the newer ones.
523 525
524 It is required for systems with ancient MFM/RLL/ESDI drives. 526 It is required for systems with ancient MFM/RLL/ESDI drives.
525 527
526 If unsure, say N. 528 If unsure, say N.
527 529
528 config BLK_DEV_RBD 530 config BLK_DEV_RBD
529 tristate "Rados block device (RBD)" 531 tristate "Rados block device (RBD)"
530 depends on INET && EXPERIMENTAL && BLOCK 532 depends on INET && EXPERIMENTAL && BLOCK
531 select CEPH_LIB 533 select CEPH_LIB
532 select LIBCRC32C 534 select LIBCRC32C
533 select CRYPTO_AES 535 select CRYPTO_AES
534 select CRYPTO 536 select CRYPTO
535 default n 537 default n
536 help 538 help
537 Say Y here if you want include the Rados block device, which stripes 539 Say Y here if you want include the Rados block device, which stripes
538 a block device over objects stored in the Ceph distributed object 540 a block device over objects stored in the Ceph distributed object
539 store. 541 store.
540 542
541 More information at http://ceph.newdream.net/. 543 More information at http://ceph.newdream.net/.
542 544
543 If unsure, say N. 545 If unsure, say N.
544 546
545 endif # BLK_DEV 547 endif # BLK_DEV
546 548
drivers/block/Makefile
1 # 1 #
2 # Makefile for the kernel block device drivers. 2 # Makefile for the kernel block device drivers.
3 # 3 #
4 # 12 June 2000, Christoph Hellwig <hch@infradead.org> 4 # 12 June 2000, Christoph Hellwig <hch@infradead.org>
5 # Rewritten to use lists instead of if-statements. 5 # Rewritten to use lists instead of if-statements.
6 # 6 #
7 7
8 obj-$(CONFIG_MAC_FLOPPY) += swim3.o 8 obj-$(CONFIG_MAC_FLOPPY) += swim3.o
9 obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o 9 obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
10 obj-$(CONFIG_BLK_DEV_FD) += floppy.o 10 obj-$(CONFIG_BLK_DEV_FD) += floppy.o
11 obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o 11 obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o
12 obj-$(CONFIG_PS3_DISK) += ps3disk.o 12 obj-$(CONFIG_PS3_DISK) += ps3disk.o
13 obj-$(CONFIG_PS3_VRAM) += ps3vram.o 13 obj-$(CONFIG_PS3_VRAM) += ps3vram.o
14 obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o 14 obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
15 obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o 15 obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
16 obj-$(CONFIG_BLK_DEV_RAM) += brd.o 16 obj-$(CONFIG_BLK_DEV_RAM) += brd.o
17 obj-$(CONFIG_BLK_DEV_LOOP) += loop.o 17 obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
18 obj-$(CONFIG_BLK_DEV_XD) += xd.o 18 obj-$(CONFIG_BLK_DEV_XD) += xd.o
19 obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o 19 obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
20 obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o 20 obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
21 obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o 21 obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
22 obj-$(CONFIG_XILINX_SYSACE) += xsysace.o 22 obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
23 obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o 23 obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
24 obj-$(CONFIG_MG_DISK) += mg_disk.o 24 obj-$(CONFIG_MG_DISK) += mg_disk.o
25 obj-$(CONFIG_SUNVDC) += sunvdc.o 25 obj-$(CONFIG_SUNVDC) += sunvdc.o
26 obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o 26 obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
27 27
28 obj-$(CONFIG_BLK_DEV_UMEM) += umem.o 28 obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
29 obj-$(CONFIG_BLK_DEV_NBD) += nbd.o 29 obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
30 obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o 30 obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
31 obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o 31 obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
32 32
33 obj-$(CONFIG_VIODASD) += viodasd.o 33 obj-$(CONFIG_VIODASD) += viodasd.o
34 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o 34 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
35 obj-$(CONFIG_BLK_DEV_UB) += ub.o 35 obj-$(CONFIG_BLK_DEV_UB) += ub.o
36 obj-$(CONFIG_BLK_DEV_HD) += hd.o 36 obj-$(CONFIG_BLK_DEV_HD) += hd.o
37 37
38 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o 38 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
39 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ 39 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
40 obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ 40 obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
41 obj-$(CONFIG_BLK_DEV_RBD) += rbd.o 41 obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
42 obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
42 43
43 swim_mod-y := swim.o swim_asm.o 44 swim_mod-y := swim.o swim_asm.o
44 45
drivers/block/mtip32xx/Kconfig
File was created 1 #
2 # mtip32xx device driver configuration
3 #
4
5 config BLK_DEV_PCIESSD_MTIP32XX
6 tristate "Block Device Driver for Micron PCIe SSDs"
7 depends on HOTPLUG_PCI_PCIE
8 help
9 This enables the block driver for Micron PCIe SSDs.
10
drivers/block/mtip32xx/Makefile
File was created 1 #
2 # Makefile for Block device driver for Micron PCIe SSD
3 #
4
5 obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx.o
6
drivers/block/mtip32xx/mtip32xx.c
File was created 1 /*
2 * Driver for the Micron P320 SSD
3 * Copyright (C) 2011 Micron Technology, Inc.
4 *
5 * Portions of this code were derived from works subjected to the
6 * following copyright:
7 * Copyright (C) 2009 Integrated Device Technology, Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 */
20
21 #include <linux/pci.h>
22 #include <linux/interrupt.h>
23 #include <linux/ata.h>
24 #include <linux/delay.h>
25 #include <linux/hdreg.h>
26 #include <linux/uaccess.h>
27 #include <linux/random.h>
28 #include <linux/smp.h>
29 #include <linux/compat.h>
30 #include <linux/fs.h>
31 #include <linux/module.h>
32 #include <linux/genhd.h>
33 #include <linux/blkdev.h>
34 #include <linux/bio.h>
35 #include <linux/dma-mapping.h>
36 #include <linux/idr.h>
37 #include <linux/kthread.h>
38 #include <../drivers/ata/ahci.h>
39 #include "mtip32xx.h"
40
41 #define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
42 #define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
43 #define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
44 #define HW_PORT_PRIV_DMA_SZ \
45 (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
46
47 #define HOST_HSORG 0xFC
48 #define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
49 #define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
50 #define HSORG_HWREV 0xFF00
51 #define HSORG_STYLE 0x8
52 #define HSORG_SLOTGROUPS 0x7
53
54 #define PORT_COMMAND_ISSUE 0x38
55 #define PORT_SDBV 0x7C
56
57 #define PORT_OFFSET 0x100
58 #define PORT_MEM_SIZE 0x80
59
60 #define PORT_IRQ_ERR \
61 (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \
62 PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \
63 PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \
64 PORT_IRQ_OVERFLOW)
65 #define PORT_IRQ_LEGACY \
66 (PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
67 #define PORT_IRQ_HANDLED \
68 (PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \
69 PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \
70 PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY)
71 #define DEF_PORT_IRQ \
72 (PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS)
73
74 /* product numbers */
75 #define MTIP_PRODUCT_UNKNOWN 0x00
76 #define MTIP_PRODUCT_ASICFPGA 0x11
77
78 /* Device instance number, incremented each time a device is probed. */
79 static int instance;
80
81 /*
82 * Global variable used to hold the major block device number
83 * allocated in mtip_init().
84 */
85 static int mtip_major;
86
87 static DEFINE_SPINLOCK(rssd_index_lock);
88 static DEFINE_IDA(rssd_index_ida);
89
90 static int mtip_block_initialize(struct driver_data *dd);
91
92 #ifdef CONFIG_COMPAT
93 struct mtip_compat_ide_task_request_s {
94 __u8 io_ports[8];
95 __u8 hob_ports[8];
96 ide_reg_valid_t out_flags;
97 ide_reg_valid_t in_flags;
98 int data_phase;
99 int req_cmd;
100 compat_ulong_t out_size;
101 compat_ulong_t in_size;
102 };
103 #endif
104
105 /*
106 * This function check_for_surprise_removal is called
107 * while card is removed from the system and it will
108 * read the vendor id from the configration space
109 *
110 * @pdev Pointer to the pci_dev structure.
111 *
112 * return value
113 * true if device removed, else false
114 */
115 static bool mtip_check_surprise_removal(struct pci_dev *pdev)
116 {
117 u16 vendor_id = 0;
118
119 /* Read the vendorID from the configuration space */
120 pci_read_config_word(pdev, 0x00, &vendor_id);
121 if (vendor_id == 0xFFFF)
122 return true; /* device removed */
123
124 return false; /* device present */
125 }
126
127 /*
128 * This function is called for clean the pending command in the
129 * command slot during the surprise removal of device and return
130 * error to the upper layer.
131 *
132 * @dd Pointer to the DRIVER_DATA structure.
133 *
134 * return value
135 * None
136 */
137 static void mtip_command_cleanup(struct driver_data *dd)
138 {
139 int group = 0, commandslot = 0, commandindex = 0;
140 struct mtip_cmd *command;
141 struct mtip_port *port = dd->port;
142
143 for (group = 0; group < 4; group++) {
144 for (commandslot = 0; commandslot < 32; commandslot++) {
145 if (!(port->allocated[group] & (1 << commandslot)))
146 continue;
147
148 commandindex = group << 5 | commandslot;
149 command = &port->commands[commandindex];
150
151 if (atomic_read(&command->active)
152 && (command->async_callback)) {
153 command->async_callback(command->async_data,
154 -ENODEV);
155 command->async_callback = NULL;
156 command->async_data = NULL;
157 }
158
159 dma_unmap_sg(&port->dd->pdev->dev,
160 command->sg,
161 command->scatter_ents,
162 command->direction);
163 }
164 }
165
166 up(&port->cmd_slot);
167
168 atomic_set(&dd->drv_cleanup_done, true);
169 }
170
171 /*
172 * Obtain an empty command slot.
173 *
174 * This function needs to be reentrant since it could be called
175 * at the same time on multiple CPUs. The allocation of the
176 * command slot must be atomic.
177 *
178 * @port Pointer to the port data structure.
179 *
180 * return value
181 * >= 0 Index of command slot obtained.
182 * -1 No command slots available.
183 */
184 static int get_slot(struct mtip_port *port)
185 {
186 int slot, i;
187 unsigned int num_command_slots = port->dd->slot_groups * 32;
188
189 /*
190 * Try 10 times, because there is a small race here.
191 * that's ok, because it's still cheaper than a lock.
192 *
193 * Race: Since this section is not protected by lock, same bit
194 * could be chosen by different process contexts running in
195 * different processor. So instead of costly lock, we are going
196 * with loop.
197 */
198 for (i = 0; i < 10; i++) {
199 slot = find_next_zero_bit(port->allocated,
200 num_command_slots, 1);
201 if ((slot < num_command_slots) &&
202 (!test_and_set_bit(slot, port->allocated)))
203 return slot;
204 }
205 dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
206
207 if (mtip_check_surprise_removal(port->dd->pdev)) {
208 /* Device not present, clean outstanding commands */
209 mtip_command_cleanup(port->dd);
210 }
211 return -1;
212 }
213
214 /*
215 * Release a command slot.
216 *
217 * @port Pointer to the port data structure.
218 * @tag Tag of command to release
219 *
220 * return value
221 * None
222 */
223 static inline void release_slot(struct mtip_port *port, int tag)
224 {
225 smp_mb__before_clear_bit();
226 clear_bit(tag, port->allocated);
227 smp_mb__after_clear_bit();
228 }
229
230 /*
231 * Reset the HBA (without sleeping)
232 *
233 * Just like hba_reset, except does not call sleep, so can be
234 * run from interrupt/tasklet context.
235 *
236 * @dd Pointer to the driver data structure.
237 *
238 * return value
239 * 0 The reset was successful.
240 * -1 The HBA Reset bit did not clear.
241 */
242 static int hba_reset_nosleep(struct driver_data *dd)
243 {
244 unsigned long timeout;
245
246 /* Chip quirk: quiesce any chip function */
247 mdelay(10);
248
249 /* Set the reset bit */
250 writel(HOST_RESET, dd->mmio + HOST_CTL);
251
252 /* Flush */
253 readl(dd->mmio + HOST_CTL);
254
255 /*
256 * Wait 10ms then spin for up to 1 second
257 * waiting for reset acknowledgement
258 */
259 timeout = jiffies + msecs_to_jiffies(1000);
260 mdelay(10);
261 while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
262 && time_before(jiffies, timeout))
263 mdelay(1);
264
265 if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
266 return -1;
267
268 return 0;
269 }
270
271 /*
272 * Issue a command to the hardware.
273 *
274 * Set the appropriate bit in the s_active and Command Issue hardware
275 * registers, causing hardware command processing to begin.
276 *
277 * @port Pointer to the port structure.
278 * @tag The tag of the command to be issued.
279 *
280 * return value
281 * None
282 */
283 static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
284 {
285 unsigned long flags = 0;
286
287 atomic_set(&port->commands[tag].active, 1);
288
289 spin_lock_irqsave(&port->cmd_issue_lock, flags);
290
291 writel((1 << MTIP_TAG_BIT(tag)),
292 port->s_active[MTIP_TAG_INDEX(tag)]);
293 writel((1 << MTIP_TAG_BIT(tag)),
294 port->cmd_issue[MTIP_TAG_INDEX(tag)]);
295
296 spin_unlock_irqrestore(&port->cmd_issue_lock, flags);
297 }
298
299 /*
300 * Enable/disable the reception of FIS
301 *
302 * @port Pointer to the port data structure
303 * @enable 1 to enable, 0 to disable
304 *
305 * return value
306 * Previous state: 1 enabled, 0 disabled
307 */
308 static int mtip_enable_fis(struct mtip_port *port, int enable)
309 {
310 u32 tmp;
311
312 /* enable FIS reception */
313 tmp = readl(port->mmio + PORT_CMD);
314 if (enable)
315 writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
316 else
317 writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
318
319 /* Flush */
320 readl(port->mmio + PORT_CMD);
321
322 return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX));
323 }
324
325 /*
326 * Enable/disable the DMA engine
327 *
328 * @port Pointer to the port data structure
329 * @enable 1 to enable, 0 to disable
330 *
331 * return value
332 * Previous state: 1 enabled, 0 disabled.
333 */
334 static int mtip_enable_engine(struct mtip_port *port, int enable)
335 {
336 u32 tmp;
337
338 /* enable FIS reception */
339 tmp = readl(port->mmio + PORT_CMD);
340 if (enable)
341 writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD);
342 else
343 writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD);
344
345 readl(port->mmio + PORT_CMD);
346 return (((tmp & PORT_CMD_START) == PORT_CMD_START));
347 }
348
349 /*
350 * Enables the port DMA engine and FIS reception.
351 *
352 * return value
353 * None
354 */
355 static inline void mtip_start_port(struct mtip_port *port)
356 {
357 /* Enable FIS reception */
358 mtip_enable_fis(port, 1);
359
360 /* Enable the DMA engine */
361 mtip_enable_engine(port, 1);
362 }
363
364 /*
365 * Deinitialize a port by disabling port interrupts, the DMA engine,
366 * and FIS reception.
367 *
368 * @port Pointer to the port structure
369 *
370 * return value
371 * None
372 */
373 static inline void mtip_deinit_port(struct mtip_port *port)
374 {
375 /* Disable interrupts on this port */
376 writel(0, port->mmio + PORT_IRQ_MASK);
377
378 /* Disable the DMA engine */
379 mtip_enable_engine(port, 0);
380
381 /* Disable FIS reception */
382 mtip_enable_fis(port, 0);
383 }
384
385 /*
386 * Initialize a port.
387 *
388 * This function deinitializes the port by calling mtip_deinit_port() and
389 * then initializes it by setting the command header and RX FIS addresses,
390 * clearing the SError register and any pending port interrupts before
391 * re-enabling the default set of port interrupts.
392 *
393 * @port Pointer to the port structure.
394 *
395 * return value
396 * None
397 */
398 static void mtip_init_port(struct mtip_port *port)
399 {
400 int i;
401 mtip_deinit_port(port);
402
403 /* Program the command list base and FIS base addresses */
404 if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) {
405 writel((port->command_list_dma >> 16) >> 16,
406 port->mmio + PORT_LST_ADDR_HI);
407 writel((port->rxfis_dma >> 16) >> 16,
408 port->mmio + PORT_FIS_ADDR_HI);
409 }
410
411 writel(port->command_list_dma & 0xFFFFFFFF,
412 port->mmio + PORT_LST_ADDR);
413 writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR);
414
415 /* Clear SError */
416 writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
417
418 /* reset the completed registers.*/
419 for (i = 0; i < port->dd->slot_groups; i++)
420 writel(0xFFFFFFFF, port->completed[i]);
421
422 /* Clear any pending interrupts for this port */
423 writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
424
425 /* Enable port interrupts */
426 writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
427 }
428
429 /*
430 * Restart a port
431 *
432 * @port Pointer to the port data structure.
433 *
434 * return value
435 * None
436 */
437 static void mtip_restart_port(struct mtip_port *port)
438 {
439 unsigned long timeout;
440
441 /* Disable the DMA engine */
442 mtip_enable_engine(port, 0);
443
444 /* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */
445 timeout = jiffies + msecs_to_jiffies(500);
446 while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON)
447 && time_before(jiffies, timeout))
448 ;
449
450 /*
451 * Chip quirk: escalate to hba reset if
452 * PxCMD.CR not clear after 500 ms
453 */
454 if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) {
455 dev_warn(&port->dd->pdev->dev,
456 "PxCMD.CR not clear, escalating reset\n");
457
458 if (hba_reset_nosleep(port->dd))
459 dev_err(&port->dd->pdev->dev,
460 "HBA reset escalation failed.\n");
461
462 /* 30 ms delay before com reset to quiesce chip */
463 mdelay(30);
464 }
465
466 dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n");
467
468 /* Set PxSCTL.DET */
469 writel(readl(port->mmio + PORT_SCR_CTL) |
470 1, port->mmio + PORT_SCR_CTL);
471 readl(port->mmio + PORT_SCR_CTL);
472
473 /* Wait 1 ms to quiesce chip function */
474 timeout = jiffies + msecs_to_jiffies(1);
475 while (time_before(jiffies, timeout))
476 ;
477
478 /* Clear PxSCTL.DET */
479 writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
480 port->mmio + PORT_SCR_CTL);
481 readl(port->mmio + PORT_SCR_CTL);
482
483 /* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */
484 timeout = jiffies + msecs_to_jiffies(500);
485 while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
486 && time_before(jiffies, timeout))
487 ;
488
489 if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
490 dev_warn(&port->dd->pdev->dev,
491 "COM reset failed\n");
492
493 /* Clear SError, the PxSERR.DIAG.x should be set so clear it */
494 writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
495
496 /* Enable the DMA engine */
497 mtip_enable_engine(port, 1);
498 }
499
500 /*
501 * Called periodically to see if any read/write commands are
502 * taking too long to complete.
503 *
504 * @data Pointer to the PORT data structure.
505 *
506 * return value
507 * None
508 */
509 static void mtip_timeout_function(unsigned long int data)
510 {
511 struct mtip_port *port = (struct mtip_port *) data;
512 struct host_to_dev_fis *fis;
513 struct mtip_cmd *command;
514 int tag, cmdto_cnt = 0;
515 unsigned int bit, group;
516 unsigned int num_command_slots = port->dd->slot_groups * 32;
517
518 if (unlikely(!port))
519 return;
520
521 if (atomic_read(&port->dd->resumeflag) == true) {
522 mod_timer(&port->cmd_timer,
523 jiffies + msecs_to_jiffies(30000));
524 return;
525 }
526
527 for (tag = 0; tag < num_command_slots; tag++) {
528 /*
529 * Skip internal command slot as it has
530 * its own timeout mechanism
531 */
532 if (tag == MTIP_TAG_INTERNAL)
533 continue;
534
535 if (atomic_read(&port->commands[tag].active) &&
536 (time_after(jiffies, port->commands[tag].comp_time))) {
537 group = tag >> 5;
538 bit = tag & 0x1F;
539
540 command = &port->commands[tag];
541 fis = (struct host_to_dev_fis *) command->command;
542
543 dev_warn(&port->dd->pdev->dev,
544 "Timeout for command tag %d\n", tag);
545
546 cmdto_cnt++;
547 if (cmdto_cnt == 1)
548 set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
549
550 /*
551 * Clear the completed bit. This should prevent
552 * any interrupt handlers from trying to retire
553 * the command.
554 */
555 writel(1 << bit, port->completed[group]);
556
557 /* Call the async completion callback. */
558 if (likely(command->async_callback))
559 command->async_callback(command->async_data,
560 -EIO);
561 command->async_callback = NULL;
562 command->comp_func = NULL;
563
564 /* Unmap the DMA scatter list entries */
565 dma_unmap_sg(&port->dd->pdev->dev,
566 command->sg,
567 command->scatter_ents,
568 command->direction);
569
570 /*
571 * Clear the allocated bit and active tag for the
572 * command.
573 */
574 atomic_set(&port->commands[tag].active, 0);
575 release_slot(port, tag);
576
577 up(&port->cmd_slot);
578 }
579 }
580
581 if (cmdto_cnt) {
582 dev_warn(&port->dd->pdev->dev,
583 "%d commands timed out: restarting port",
584 cmdto_cnt);
585 mtip_restart_port(port);
586 clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
587 wake_up_interruptible(&port->svc_wait);
588 }
589
590 /* Restart the timer */
591 mod_timer(&port->cmd_timer,
592 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
593 }
594
595 /*
596 * IO completion function.
597 *
598 * This completion function is called by the driver ISR when a
599 * command that was issued by the kernel completes. It first calls the
600 * asynchronous completion function which normally calls back into the block
601 * layer passing the asynchronous callback data, then unmaps the
602 * scatter list associated with the completed command, and finally
603 * clears the allocated bit associated with the completed command.
604 *
605 * @port Pointer to the port data structure.
606 * @tag Tag of the command.
607 * @data Pointer to driver_data.
608 * @status Completion status.
609 *
610 * return value
611 * None
612 */
613 static void mtip_async_complete(struct mtip_port *port,
614 int tag,
615 void *data,
616 int status)
617 {
618 struct mtip_cmd *command;
619 struct driver_data *dd = data;
620 int cb_status = status ? -EIO : 0;
621
622 if (unlikely(!dd) || unlikely(!port))
623 return;
624
625 command = &port->commands[tag];
626
627 if (unlikely(status == PORT_IRQ_TF_ERR)) {
628 dev_warn(&port->dd->pdev->dev,
629 "Command tag %d failed due to TFE\n", tag);
630 }
631
632 /* Upper layer callback */
633 if (likely(command->async_callback))
634 command->async_callback(command->async_data, cb_status);
635
636 command->async_callback = NULL;
637 command->comp_func = NULL;
638
639 /* Unmap the DMA scatter list entries */
640 dma_unmap_sg(&dd->pdev->dev,
641 command->sg,
642 command->scatter_ents,
643 command->direction);
644
645 /* Clear the allocated and active bits for the command */
646 atomic_set(&port->commands[tag].active, 0);
647 release_slot(port, tag);
648
649 up(&port->cmd_slot);
650 }
651
652 /*
653 * Internal command completion callback function.
654 *
655 * This function is normally called by the driver ISR when an internal
656 * command completed. This function signals the command completion by
657 * calling complete().
658 *
659 * @port Pointer to the port data structure.
660 * @tag Tag of the command that has completed.
661 * @data Pointer to a completion structure.
662 * @status Completion status.
663 *
664 * return value
665 * None
666 */
667 static void mtip_completion(struct mtip_port *port,
668 int tag,
669 void *data,
670 int status)
671 {
672 struct mtip_cmd *command = &port->commands[tag];
673 struct completion *waiting = data;
674 if (unlikely(status == PORT_IRQ_TF_ERR))
675 dev_warn(&port->dd->pdev->dev,
676 "Internal command %d completed with TFE\n", tag);
677
678 command->async_callback = NULL;
679 command->comp_func = NULL;
680
681 complete(waiting);
682 }
683
684 /*
685 * Helper function for tag logging
686 */
687 static void print_tags(struct driver_data *dd,
688 char *msg,
689 unsigned long *tagbits)
690 {
691 unsigned int tag, count = 0;
692
693 for (tag = 0; tag < (dd->slot_groups) * 32; tag++) {
694 if (test_bit(tag, tagbits))
695 count++;
696 }
697 if (count)
698 dev_info(&dd->pdev->dev, "%s [%i tags]\n", msg, count);
699 }
700
701 /*
702 * Handle an error.
703 *
704 * @dd Pointer to the DRIVER_DATA structure.
705 *
706 * return value
707 * None
708 */
709 static void mtip_handle_tfe(struct driver_data *dd)
710 {
711 int group, tag, bit, reissue;
712 struct mtip_port *port;
713 struct mtip_cmd *command;
714 u32 completed;
715 struct host_to_dev_fis *fis;
716 unsigned long tagaccum[SLOTBITS_IN_LONGS];
717
718 dev_warn(&dd->pdev->dev, "Taskfile error\n");
719
720 port = dd->port;
721
722 /* Stop the timer to prevent command timeouts. */
723 del_timer(&port->cmd_timer);
724
725 /* Set eh_active */
726 set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
727
728 /* Loop through all the groups */
729 for (group = 0; group < dd->slot_groups; group++) {
730 completed = readl(port->completed[group]);
731
732 /* clear completed status register in the hardware.*/
733 writel(completed, port->completed[group]);
734
735 /* clear the tag accumulator */
736 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
737
738 /* Process successfully completed commands */
739 for (bit = 0; bit < 32 && completed; bit++) {
740 if (!(completed & (1<<bit)))
741 continue;
742 tag = (group << 5) + bit;
743
744 /* Skip the internal command slot */
745 if (tag == MTIP_TAG_INTERNAL)
746 continue;
747
748 command = &port->commands[tag];
749 if (likely(command->comp_func)) {
750 set_bit(tag, tagaccum);
751 atomic_set(&port->commands[tag].active, 0);
752 command->comp_func(port,
753 tag,
754 command->comp_data,
755 0);
756 } else {
757 dev_err(&port->dd->pdev->dev,
758 "Missing completion func for tag %d",
759 tag);
760 if (mtip_check_surprise_removal(dd->pdev)) {
761 mtip_command_cleanup(dd);
762 /* don't proceed further */
763 return;
764 }
765 }
766 }
767 }
768 print_tags(dd, "TFE tags completed:", tagaccum);
769
770 /* Restart the port */
771 mdelay(20);
772 mtip_restart_port(port);
773
774 /* clear the tag accumulator */
775 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
776
777 /* Loop through all the groups */
778 for (group = 0; group < dd->slot_groups; group++) {
779 for (bit = 0; bit < 32; bit++) {
780 reissue = 1;
781 tag = (group << 5) + bit;
782
783 /* If the active bit is set re-issue the command */
784 if (atomic_read(&port->commands[tag].active) == 0)
785 continue;
786
787 fis = (struct host_to_dev_fis *)
788 port->commands[tag].command;
789
790 /* Should re-issue? */
791 if (tag == MTIP_TAG_INTERNAL ||
792 fis->command == ATA_CMD_SET_FEATURES)
793 reissue = 0;
794
795 /*
796 * First check if this command has
797 * exceeded its retries.
798 */
799 if (reissue &&
800 (port->commands[tag].retries-- > 0)) {
801
802 set_bit(tag, tagaccum);
803
804 /* Update the timeout value. */
805 port->commands[tag].comp_time =
806 jiffies + msecs_to_jiffies(
807 MTIP_NCQ_COMMAND_TIMEOUT_MS);
808 /* Re-issue the command. */
809 mtip_issue_ncq_command(port, tag);
810
811 continue;
812 }
813
814 /* Retire a command that will not be reissued */
815 dev_warn(&port->dd->pdev->dev,
816 "retiring tag %d\n", tag);
817 atomic_set(&port->commands[tag].active, 0);
818
819 if (port->commands[tag].comp_func)
820 port->commands[tag].comp_func(
821 port,
822 tag,
823 port->commands[tag].comp_data,
824 PORT_IRQ_TF_ERR);
825 else
826 dev_warn(&port->dd->pdev->dev,
827 "Bad completion for tag %d\n",
828 tag);
829 }
830 }
831 print_tags(dd, "TFE tags reissued:", tagaccum);
832
833 /* clear eh_active */
834 clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
835 wake_up_interruptible(&port->svc_wait);
836
837 mod_timer(&port->cmd_timer,
838 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
839 }
840
841 /*
842 * Handle a set device bits interrupt
843 */
844 static inline void mtip_process_sdbf(struct driver_data *dd)
845 {
846 struct mtip_port *port = dd->port;
847 int group, tag, bit;
848 u32 completed;
849 struct mtip_cmd *command;
850
851 /* walk all bits in all slot groups */
852 for (group = 0; group < dd->slot_groups; group++) {
853 completed = readl(port->completed[group]);
854
855 /* clear completed status register in the hardware.*/
856 writel(completed, port->completed[group]);
857
858 /* Process completed commands. */
859 for (bit = 0;
860 (bit < 32) && completed;
861 bit++, completed >>= 1) {
862 if (completed & 0x01) {
863 tag = (group << 5) | bit;
864
865 /* skip internal command slot. */
866 if (unlikely(tag == MTIP_TAG_INTERNAL))
867 continue;
868
869 command = &port->commands[tag];
870 /* make internal callback */
871 if (likely(command->comp_func)) {
872 command->comp_func(
873 port,
874 tag,
875 command->comp_data,
876 0);
877 } else {
878 dev_warn(&dd->pdev->dev,
879 "Null completion "
880 "for tag %d",
881 tag);
882
883 if (mtip_check_surprise_removal(
884 dd->pdev)) {
885 mtip_command_cleanup(dd);
886 return;
887 }
888 }
889 }
890 }
891 }
892 }
893
894 /*
895 * Process legacy pio and d2h interrupts
896 */
897 static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
898 {
899 struct mtip_port *port = dd->port;
900 struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
901
902 if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
903 (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
904 & (1 << MTIP_TAG_INTERNAL))) {
905 if (cmd->comp_func) {
906 cmd->comp_func(port,
907 MTIP_TAG_INTERNAL,
908 cmd->comp_data,
909 0);
910 return;
911 }
912 }
913
914 dev_warn(&dd->pdev->dev, "IRQ status 0x%x ignored.\n", port_stat);
915
916 return;
917 }
918
919 /*
920 * Demux and handle errors
921 */
922 static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
923 {
924 if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR)))
925 mtip_handle_tfe(dd);
926
927 if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
928 dev_warn(&dd->pdev->dev,
929 "Clearing PxSERR.DIAG.x\n");
930 writel((1 << 26), dd->port->mmio + PORT_SCR_ERR);
931 }
932
933 if (unlikely(port_stat & PORT_IRQ_PHYRDY)) {
934 dev_warn(&dd->pdev->dev,
935 "Clearing PxSERR.DIAG.n\n");
936 writel((1 << 16), dd->port->mmio + PORT_SCR_ERR);
937 }
938
939 if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) {
940 dev_warn(&dd->pdev->dev,
941 "Port stat errors %x unhandled\n",
942 (port_stat & ~PORT_IRQ_HANDLED));
943 }
944 }
945
946 static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
947 {
948 struct driver_data *dd = (struct driver_data *) data;
949 struct mtip_port *port = dd->port;
950 u32 hba_stat, port_stat;
951 int rv = IRQ_NONE;
952
953 hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
954 if (hba_stat) {
955 rv = IRQ_HANDLED;
956
957 /* Acknowledge the interrupt status on the port.*/
958 port_stat = readl(port->mmio + PORT_IRQ_STAT);
959 writel(port_stat, port->mmio + PORT_IRQ_STAT);
960
961 /* Demux port status */
962 if (likely(port_stat & PORT_IRQ_SDB_FIS))
963 mtip_process_sdbf(dd);
964
965 if (unlikely(port_stat & PORT_IRQ_ERR)) {
966 if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
967 mtip_command_cleanup(dd);
968 /* don't proceed further */
969 return IRQ_HANDLED;
970 }
971
972 mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
973 }
974
975 if (unlikely(port_stat & PORT_IRQ_LEGACY))
976 mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY);
977 }
978
979 /* acknowledge interrupt */
980 writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
981
982 return rv;
983 }
984
985 /*
986 * Wrapper for mtip_handle_irq
987 * (ignores return code)
988 */
989 static void mtip_tasklet(unsigned long data)
990 {
991 mtip_handle_irq((struct driver_data *) data);
992 }
993
994 /*
995 * HBA interrupt subroutine.
996 *
997 * @irq IRQ number.
998 * @instance Pointer to the driver data structure.
999 *
1000 * return value
1001 * IRQ_HANDLED A HBA interrupt was pending and handled.
1002 * IRQ_NONE This interrupt was not for the HBA.
1003 */
1004 static irqreturn_t mtip_irq_handler(int irq, void *instance)
1005 {
1006 struct driver_data *dd = instance;
1007 tasklet_schedule(&dd->tasklet);
1008 return IRQ_HANDLED;
1009 }
1010
1011 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
1012 {
1013 atomic_set(&port->commands[tag].active, 1);
1014 writel(1 << MTIP_TAG_BIT(tag),
1015 port->cmd_issue[MTIP_TAG_INDEX(tag)]);
1016 }
1017
1018 /*
1019 * Wait for port to quiesce
1020 *
1021 * @port Pointer to port data structure
1022 * @timeout Max duration to wait (ms)
1023 *
1024 * return value
1025 * 0 Success
1026 * -EBUSY Commands still active
1027 */
1028 static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
1029 {
1030 unsigned long to;
1031 unsigned int n;
1032 unsigned int active = 1;
1033
1034 to = jiffies + msecs_to_jiffies(timeout);
1035 do {
1036 if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags) &&
1037 test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
1038 msleep(20);
1039 continue; /* svc thd is actively issuing commands */
1040 }
1041 /*
1042 * Ignore s_active bit 0 of array element 0.
1043 * This bit will always be set
1044 */
1045 active = readl(port->s_active[0]) & 0xFFFFFFFE;
1046 for (n = 1; n < port->dd->slot_groups; n++)
1047 active |= readl(port->s_active[n]);
1048
1049 if (!active)
1050 break;
1051
1052 msleep(20);
1053 } while (time_before(jiffies, to));
1054
1055 return active ? -EBUSY : 0;
1056 }
1057
1058 /*
1059 * Execute an internal command and wait for the completion.
1060 *
1061 * @port Pointer to the port data structure.
1062 * @fis Pointer to the FIS that describes the command.
1063 * @fis_len Length in WORDS of the FIS.
1064 * @buffer DMA accessible for command data.
1065 * @buf_len Length, in bytes, of the data buffer.
1066 * @opts Command header options, excluding the FIS length
1067 * and the number of PRD entries.
1068 * @timeout Time in ms to wait for the command to complete.
1069 *
1070 * return value
1071 * 0 Command completed successfully.
1072 * -EFAULT The buffer address is not correctly aligned.
1073 * -EBUSY Internal command or other IO in progress.
1074 * -EAGAIN Time out waiting for command to complete.
1075 */
1076 static int mtip_exec_internal_command(struct mtip_port *port,
1077 void *fis,
1078 int fis_len,
1079 dma_addr_t buffer,
1080 int buf_len,
1081 u32 opts,
1082 gfp_t atomic,
1083 unsigned long timeout)
1084 {
1085 struct mtip_cmd_sg *command_sg;
1086 DECLARE_COMPLETION_ONSTACK(wait);
1087 int rv = 0;
1088 struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
1089
1090 /* Make sure the buffer is 8 byte aligned. This is asic specific. */
1091 if (buffer & 0x00000007) {
1092 dev_err(&port->dd->pdev->dev,
1093 "SG buffer is not 8 byte aligned\n");
1094 return -EFAULT;
1095 }
1096
1097 /* Only one internal command should be running at a time */
1098 if (test_and_set_bit(MTIP_TAG_INTERNAL, port->allocated)) {
1099 dev_warn(&port->dd->pdev->dev,
1100 "Internal command already active\n");
1101 return -EBUSY;
1102 }
1103 set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
1104
1105 if (atomic == GFP_KERNEL) {
1106 /* wait for io to complete if non atomic */
1107 if (mtip_quiesce_io(port, 5000) < 0) {
1108 dev_warn(&port->dd->pdev->dev,
1109 "Failed to quiesce IO\n");
1110 release_slot(port, MTIP_TAG_INTERNAL);
1111 clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
1112 wake_up_interruptible(&port->svc_wait);
1113 return -EBUSY;
1114 }
1115
1116 /* Set the completion function and data for the command. */
1117 int_cmd->comp_data = &wait;
1118 int_cmd->comp_func = mtip_completion;
1119
1120 } else {
1121 /* Clear completion - we're going to poll */
1122 int_cmd->comp_data = NULL;
1123 int_cmd->comp_func = NULL;
1124 }
1125
1126 /* Copy the command to the command table */
1127 memcpy(int_cmd->command, fis, fis_len*4);
1128
1129 /* Populate the SG list */
1130 int_cmd->command_header->opts =
1131 __force_bit2int cpu_to_le32(opts | fis_len);
1132 if (buf_len) {
1133 command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ;
1134
1135 command_sg->info =
1136 __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF);
1137 command_sg->dba =
1138 __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF);
1139 command_sg->dba_upper =
1140 __force_bit2int cpu_to_le32((buffer >> 16) >> 16);
1141
1142 int_cmd->command_header->opts |=
1143 __force_bit2int cpu_to_le32((1 << 16));
1144 }
1145
1146 /* Populate the command header */
1147 int_cmd->command_header->byte_count = 0;
1148
1149 /* Issue the command to the hardware */
1150 mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
1151
1152 /* Poll if atomic, wait_for_completion otherwise */
1153 if (atomic == GFP_KERNEL) {
1154 /* Wait for the command to complete or timeout. */
1155 if (wait_for_completion_timeout(
1156 &wait,
1157 msecs_to_jiffies(timeout)) == 0) {
1158 dev_err(&port->dd->pdev->dev,
1159 "Internal command did not complete [%d] "
1160 "within timeout of %lu ms\n",
1161 atomic, timeout);
1162 rv = -EAGAIN;
1163 }
1164
1165 if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1166 & (1 << MTIP_TAG_INTERNAL)) {
1167 dev_warn(&port->dd->pdev->dev,
1168 "Retiring internal command but CI is 1.\n");
1169 }
1170
1171 } else {
1172 /* Spin for <timeout> checking if command still outstanding */
1173 timeout = jiffies + msecs_to_jiffies(timeout);
1174
1175 while ((readl(
1176 port->cmd_issue[MTIP_TAG_INTERNAL])
1177 & (1 << MTIP_TAG_INTERNAL))
1178 && time_before(jiffies, timeout))
1179 ;
1180
1181 if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1182 & (1 << MTIP_TAG_INTERNAL)) {
1183 dev_err(&port->dd->pdev->dev,
1184 "Internal command did not complete [%d]\n",
1185 atomic);
1186 rv = -EAGAIN;
1187 }
1188 }
1189
1190 /* Clear the allocated and active bits for the internal command. */
1191 atomic_set(&int_cmd->active, 0);
1192 release_slot(port, MTIP_TAG_INTERNAL);
1193 clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
1194 wake_up_interruptible(&port->svc_wait);
1195
1196 return rv;
1197 }
1198
1199 /*
1200 * Byte-swap ATA ID strings.
1201 *
1202 * ATA identify data contains strings in byte-swapped 16-bit words.
1203 * They must be swapped (on all architectures) to be usable as C strings.
1204 * This function swaps bytes in-place.
1205 *
1206 * @buf The buffer location of the string
1207 * @len The number of bytes to swap
1208 *
1209 * return value
1210 * None
1211 */
1212 static inline void ata_swap_string(u16 *buf, unsigned int len)
1213 {
1214 int i;
1215 for (i = 0; i < (len/2); i++)
1216 be16_to_cpus(&buf[i]);
1217 }
1218
1219 /*
1220 * Request the device identity information.
1221 *
1222 * If a user space buffer is not specified, i.e. is NULL, the
1223 * identify information is still read from the drive and placed
1224 * into the identify data buffer (@e port->identify) in the
1225 * port data structure.
1226 * When the identify buffer contains valid identify information @e
1227 * port->identify_valid is non-zero.
1228 *
1229 * @port Pointer to the port structure.
1230 * @user_buffer A user space buffer where the identify data should be
1231 * copied.
1232 *
1233 * return value
1234 * 0 Command completed successfully.
1235 * -EFAULT An error occurred while coping data to the user buffer.
1236 * -1 Command failed.
1237 */
1238 static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1239 {
1240 int rv = 0;
1241 struct host_to_dev_fis fis;
1242
1243 /* Build the FIS. */
1244 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1245 fis.type = 0x27;
1246 fis.opts = 1 << 7;
1247 fis.command = ATA_CMD_ID_ATA;
1248
1249 /* Set the identify information as invalid. */
1250 port->identify_valid = 0;
1251
1252 /* Clear the identify information. */
1253 memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS);
1254
1255 /* Execute the command. */
1256 if (mtip_exec_internal_command(port,
1257 &fis,
1258 5,
1259 port->identify_dma,
1260 sizeof(u16) * ATA_ID_WORDS,
1261 0,
1262 GFP_KERNEL,
1263 MTIP_INTERNAL_COMMAND_TIMEOUT_MS)
1264 < 0) {
1265 rv = -1;
1266 goto out;
1267 }
1268
1269 /*
1270 * Perform any necessary byte-swapping. Yes, the kernel does in fact
1271 * perform field-sensitive swapping on the string fields.
1272 * See the kernel use of ata_id_string() for proof of this.
1273 */
1274 #ifdef __LITTLE_ENDIAN
1275 ata_swap_string(port->identify + 27, 40); /* model string*/
1276 ata_swap_string(port->identify + 23, 8); /* firmware string*/
1277 ata_swap_string(port->identify + 10, 20); /* serial# string*/
1278 #else
1279 {
1280 int i;
1281 for (i = 0; i < ATA_ID_WORDS; i++)
1282 port->identify[i] = le16_to_cpu(port->identify[i]);
1283 }
1284 #endif
1285
1286 /* Set the identify buffer as valid. */
1287 port->identify_valid = 1;
1288
1289 if (user_buffer) {
1290 if (copy_to_user(
1291 user_buffer,
1292 port->identify,
1293 ATA_ID_WORDS * sizeof(u16))) {
1294 rv = -EFAULT;
1295 goto out;
1296 }
1297 }
1298
1299 out:
1300 return rv;
1301 }
1302
1303 /*
1304 * Issue a standby immediate command to the device.
1305 *
1306 * @port Pointer to the port structure.
1307 *
1308 * return value
1309 * 0 Command was executed successfully.
1310 * -1 An error occurred while executing the command.
1311 */
1312 static int mtip_standby_immediate(struct mtip_port *port)
1313 {
1314 int rv;
1315 struct host_to_dev_fis fis;
1316
1317 /* Build the FIS. */
1318 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1319 fis.type = 0x27;
1320 fis.opts = 1 << 7;
1321 fis.command = ATA_CMD_STANDBYNOW1;
1322
1323 /* Execute the command. Use a 15-second timeout for large drives. */
1324 rv = mtip_exec_internal_command(port,
1325 &fis,
1326 5,
1327 0,
1328 0,
1329 0,
1330 GFP_KERNEL,
1331 15000);
1332
1333 return rv;
1334 }
1335
1336 /*
1337 * Get the drive capacity.
1338 *
1339 * @dd Pointer to the device data structure.
1340 * @sectors Pointer to the variable that will receive the sector count.
1341 *
1342 * return value
1343 * 1 Capacity was returned successfully.
1344 * 0 The identify information is invalid.
1345 */
1346 static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors)
1347 {
1348 struct mtip_port *port = dd->port;
1349 u64 total, raw0, raw1, raw2, raw3;
1350 raw0 = port->identify[100];
1351 raw1 = port->identify[101];
1352 raw2 = port->identify[102];
1353 raw3 = port->identify[103];
1354 total = raw0 | raw1<<16 | raw2<<32 | raw3<<48;
1355 *sectors = total;
1356 return (bool) !!port->identify_valid;
1357 }
1358
1359 /*
1360 * Reset the HBA.
1361 *
1362 * Resets the HBA by setting the HBA Reset bit in the Global
1363 * HBA Control register. After setting the HBA Reset bit the
1364 * function waits for 1 second before reading the HBA Reset
1365 * bit to make sure it has cleared. If HBA Reset is not clear
1366 * an error is returned. Cannot be used in non-blockable
1367 * context.
1368 *
1369 * @dd Pointer to the driver data structure.
1370 *
1371 * return value
1372 * 0 The reset was successful.
1373 * -1 The HBA Reset bit did not clear.
1374 */
1375 static int mtip_hba_reset(struct driver_data *dd)
1376 {
1377 mtip_deinit_port(dd->port);
1378
1379 /* Set the reset bit */
1380 writel(HOST_RESET, dd->mmio + HOST_CTL);
1381
1382 /* Flush */
1383 readl(dd->mmio + HOST_CTL);
1384
1385 /* Wait for reset to clear */
1386 ssleep(1);
1387
1388 /* Check the bit has cleared */
1389 if (readl(dd->mmio + HOST_CTL) & HOST_RESET) {
1390 dev_err(&dd->pdev->dev,
1391 "Reset bit did not clear.\n");
1392 return -1;
1393 }
1394
1395 return 0;
1396 }
1397
1398 /*
1399 * Display the identify command data.
1400 *
1401 * @port Pointer to the port data structure.
1402 *
1403 * return value
1404 * None
1405 */
1406 static void mtip_dump_identify(struct mtip_port *port)
1407 {
1408 sector_t sectors;
1409 unsigned short revid;
1410 char cbuf[42];
1411
1412 if (!port->identify_valid)
1413 return;
1414
1415 strlcpy(cbuf, (char *)(port->identify+10), 21);
1416 dev_info(&port->dd->pdev->dev,
1417 "Serial No.: %s\n", cbuf);
1418
1419 strlcpy(cbuf, (char *)(port->identify+23), 9);
1420 dev_info(&port->dd->pdev->dev,
1421 "Firmware Ver.: %s\n", cbuf);
1422
1423 strlcpy(cbuf, (char *)(port->identify+27), 41);
1424 dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
1425
1426 if (mtip_hw_get_capacity(port->dd, &sectors))
1427 dev_info(&port->dd->pdev->dev,
1428 "Capacity: %llu sectors (%llu MB)\n",
1429 (u64)sectors,
1430 ((u64)sectors) * ATA_SECT_SIZE >> 20);
1431
1432 pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
1433 switch (revid & 0xFF) {
1434 case 0x1:
1435 strlcpy(cbuf, "A0", 3);
1436 break;
1437 case 0x3:
1438 strlcpy(cbuf, "A2", 3);
1439 break;
1440 default:
1441 strlcpy(cbuf, "?", 2);
1442 break;
1443 }
1444 dev_info(&port->dd->pdev->dev,
1445 "Card Type: %s\n", cbuf);
1446 }
1447
1448 /*
1449 * Map the commands scatter list into the command table.
1450 *
1451 * @command Pointer to the command.
1452 * @nents Number of scatter list entries.
1453 *
1454 * return value
1455 * None
1456 */
1457 static inline void fill_command_sg(struct driver_data *dd,
1458 struct mtip_cmd *command,
1459 int nents)
1460 {
1461 int n;
1462 unsigned int dma_len;
1463 struct mtip_cmd_sg *command_sg;
1464 struct scatterlist *sg = command->sg;
1465
1466 command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
1467
1468 for (n = 0; n < nents; n++) {
1469 dma_len = sg_dma_len(sg);
1470 if (dma_len > 0x400000)
1471 dev_err(&dd->pdev->dev,
1472 "DMA segment length truncated\n");
1473 command_sg->info = __force_bit2int
1474 cpu_to_le32((dma_len-1) & 0x3FFFFF);
1475 command_sg->dba = __force_bit2int
1476 cpu_to_le32(sg_dma_address(sg));
1477 command_sg->dba_upper = __force_bit2int
1478 cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
1479 command_sg++;
1480 sg++;
1481 }
1482 }
1483
1484 /*
1485 * @brief Execute a drive command.
1486 *
1487 * return value 0 The command completed successfully.
1488 * return value -1 An error occurred while executing the command.
1489 */
1490 static int exec_drive_task(struct mtip_port *port, u8 *command)
1491 {
1492 struct host_to_dev_fis fis;
1493 struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
1494
1495 /* Build the FIS. */
1496 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1497 fis.type = 0x27;
1498 fis.opts = 1 << 7;
1499 fis.command = command[0];
1500 fis.features = command[1];
1501 fis.sect_count = command[2];
1502 fis.sector = command[3];
1503 fis.cyl_low = command[4];
1504 fis.cyl_hi = command[5];
1505 fis.device = command[6] & ~0x10; /* Clear the dev bit*/
1506
1507
1508 dbg_printk(MTIP_DRV_NAME "%s: User Command: cmd %x, feat %x, "
1509 "nsect %x, sect %x, lcyl %x, "
1510 "hcyl %x, sel %x\n",
1511 __func__,
1512 command[0],
1513 command[1],
1514 command[2],
1515 command[3],
1516 command[4],
1517 command[5],
1518 command[6]);
1519
1520 /* Execute the command. */
1521 if (mtip_exec_internal_command(port,
1522 &fis,
1523 5,
1524 0,
1525 0,
1526 0,
1527 GFP_KERNEL,
1528 MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) {
1529 return -1;
1530 }
1531
1532 command[0] = reply->command; /* Status*/
1533 command[1] = reply->features; /* Error*/
1534 command[4] = reply->cyl_low;
1535 command[5] = reply->cyl_hi;
1536
1537 dbg_printk(MTIP_DRV_NAME "%s: Completion Status: stat %x, "
1538 "err %x , cyl_lo %x cyl_hi %x\n",
1539 __func__,
1540 command[0],
1541 command[1],
1542 command[4],
1543 command[5]);
1544
1545 return 0;
1546 }
1547
1548 /*
1549 * @brief Execute a drive command.
1550 *
1551 * @param port Pointer to the port data structure.
1552 * @param command Pointer to the user specified command parameters.
1553 * @param user_buffer Pointer to the user space buffer where read sector
1554 * data should be copied.
1555 *
1556 * return value 0 The command completed successfully.
1557 * return value -EFAULT An error occurred while copying the completion
1558 * data to the user space buffer.
1559 * return value -1 An error occurred while executing the command.
1560 */
1561 static int exec_drive_command(struct mtip_port *port, u8 *command,
1562 void __user *user_buffer)
1563 {
1564 struct host_to_dev_fis fis;
1565 struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
1566
1567 /* Build the FIS. */
1568 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1569 fis.type = 0x27;
1570 fis.opts = 1 << 7;
1571 fis.command = command[0];
1572 fis.features = command[2];
1573 fis.sect_count = command[3];
1574 if (fis.command == ATA_CMD_SMART) {
1575 fis.sector = command[1];
1576 fis.cyl_low = 0x4F;
1577 fis.cyl_hi = 0xC2;
1578 }
1579
1580 dbg_printk(MTIP_DRV_NAME
1581 "%s: User Command: cmd %x, sect %x, "
1582 "feat %x, sectcnt %x\n",
1583 __func__,
1584 command[0],
1585 command[1],
1586 command[2],
1587 command[3]);
1588
1589 memset(port->sector_buffer, 0x00, ATA_SECT_SIZE);
1590
1591 /* Execute the command. */
1592 if (mtip_exec_internal_command(port,
1593 &fis,
1594 5,
1595 port->sector_buffer_dma,
1596 (command[3] != 0) ? ATA_SECT_SIZE : 0,
1597 0,
1598 GFP_KERNEL,
1599 MTIP_IOCTL_COMMAND_TIMEOUT_MS)
1600 < 0) {
1601 return -1;
1602 }
1603
1604 /* Collect the completion status. */
1605 command[0] = reply->command; /* Status*/
1606 command[1] = reply->features; /* Error*/
1607 command[2] = command[3];
1608
1609 dbg_printk(MTIP_DRV_NAME
1610 "%s: Completion Status: stat %x, "
1611 "err %x, cmd %x\n",
1612 __func__,
1613 command[0],
1614 command[1],
1615 command[2]);
1616
1617 if (user_buffer && command[3]) {
1618 if (copy_to_user(user_buffer,
1619 port->sector_buffer,
1620 ATA_SECT_SIZE * command[3])) {
1621 return -EFAULT;
1622 }
1623 }
1624
1625 return 0;
1626 }
1627
1628 /*
1629 * Indicates whether a command has a single sector payload.
1630 *
1631 * @command passed to the device to perform the certain event.
1632 * @features passed to the device to perform the certain event.
1633 *
1634 * return value
1635 * 1 command is one that always has a single sector payload,
1636 * regardless of the value in the Sector Count field.
1637 * 0 otherwise
1638 *
1639 */
1640 static unsigned int implicit_sector(unsigned char command,
1641 unsigned char features)
1642 {
1643 unsigned int rv = 0;
1644
1645 /* list of commands that have an implicit sector count of 1 */
1646 switch (command) {
1647 case ATA_CMD_SEC_SET_PASS:
1648 case ATA_CMD_SEC_UNLOCK:
1649 case ATA_CMD_SEC_ERASE_PREP:
1650 case ATA_CMD_SEC_ERASE_UNIT:
1651 case ATA_CMD_SEC_FREEZE_LOCK:
1652 case ATA_CMD_SEC_DISABLE_PASS:
1653 case ATA_CMD_PMP_READ:
1654 case ATA_CMD_PMP_WRITE:
1655 rv = 1;
1656 break;
1657 case ATA_CMD_SET_MAX:
1658 if (features == ATA_SET_MAX_UNLOCK)
1659 rv = 1;
1660 break;
1661 case ATA_CMD_SMART:
1662 if ((features == ATA_SMART_READ_VALUES) ||
1663 (features == ATA_SMART_READ_THRESHOLDS))
1664 rv = 1;
1665 break;
1666 case ATA_CMD_CONF_OVERLAY:
1667 if ((features == ATA_DCO_IDENTIFY) ||
1668 (features == ATA_DCO_SET))
1669 rv = 1;
1670 break;
1671 }
1672 return rv;
1673 }
1674
1675 /*
1676 * Executes a taskfile
1677 * See ide_taskfile_ioctl() for derivation
1678 */
1679 static int exec_drive_taskfile(struct driver_data *dd,
1680 void __user *buf,
1681 ide_task_request_t *req_task,
1682 int outtotal)
1683 {
1684 struct host_to_dev_fis fis;
1685 struct host_to_dev_fis *reply;
1686 u8 *outbuf = NULL;
1687 u8 *inbuf = NULL;
1688 dma_addr_t outbuf_dma = 0;
1689 dma_addr_t inbuf_dma = 0;
1690 dma_addr_t dma_buffer = 0;
1691 int err = 0;
1692 unsigned int taskin = 0;
1693 unsigned int taskout = 0;
1694 u8 nsect = 0;
1695 unsigned int timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
1696 unsigned int force_single_sector;
1697 unsigned int transfer_size;
1698 unsigned long task_file_data;
1699 int intotal = outtotal + req_task->out_size;
1700
1701 taskout = req_task->out_size;
1702 taskin = req_task->in_size;
1703 /* 130560 = 512 * 0xFF*/
1704 if (taskin > 130560 || taskout > 130560) {
1705 err = -EINVAL;
1706 goto abort;
1707 }
1708
1709 if (taskout) {
1710 outbuf = kzalloc(taskout, GFP_KERNEL);
1711 if (outbuf == NULL) {
1712 err = -ENOMEM;
1713 goto abort;
1714 }
1715 if (copy_from_user(outbuf, buf + outtotal, taskout)) {
1716 err = -EFAULT;
1717 goto abort;
1718 }
1719 outbuf_dma = pci_map_single(dd->pdev,
1720 outbuf,
1721 taskout,
1722 DMA_TO_DEVICE);
1723 if (outbuf_dma == 0) {
1724 err = -ENOMEM;
1725 goto abort;
1726 }
1727 dma_buffer = outbuf_dma;
1728 }
1729
1730 if (taskin) {
1731 inbuf = kzalloc(taskin, GFP_KERNEL);
1732 if (inbuf == NULL) {
1733 err = -ENOMEM;
1734 goto abort;
1735 }
1736
1737 if (copy_from_user(inbuf, buf + intotal, taskin)) {
1738 err = -EFAULT;
1739 goto abort;
1740 }
1741 inbuf_dma = pci_map_single(dd->pdev,
1742 inbuf,
1743 taskin, DMA_FROM_DEVICE);
1744 if (inbuf_dma == 0) {
1745 err = -ENOMEM;
1746 goto abort;
1747 }
1748 dma_buffer = inbuf_dma;
1749 }
1750
1751 /* only supports PIO and non-data commands from this ioctl. */
1752 switch (req_task->data_phase) {
1753 case TASKFILE_OUT:
1754 nsect = taskout / ATA_SECT_SIZE;
1755 reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
1756 break;
1757 case TASKFILE_IN:
1758 reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
1759 break;
1760 case TASKFILE_NO_DATA:
1761 reply = (dd->port->rxfis + RX_FIS_D2H_REG);
1762 break;
1763 default:
1764 err = -EINVAL;
1765 goto abort;
1766 }
1767
1768 /* Build the FIS. */
1769 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1770
1771 fis.type = 0x27;
1772 fis.opts = 1 << 7;
1773 fis.command = req_task->io_ports[7];
1774 fis.features = req_task->io_ports[1];
1775 fis.sect_count = req_task->io_ports[2];
1776 fis.lba_low = req_task->io_ports[3];
1777 fis.lba_mid = req_task->io_ports[4];
1778 fis.lba_hi = req_task->io_ports[5];
1779 /* Clear the dev bit*/
1780 fis.device = req_task->io_ports[6] & ~0x10;
1781
1782 if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) {
1783 req_task->in_flags.all =
1784 IDE_TASKFILE_STD_IN_FLAGS |
1785 (IDE_HOB_STD_IN_FLAGS << 8);
1786 fis.lba_low_ex = req_task->hob_ports[3];
1787 fis.lba_mid_ex = req_task->hob_ports[4];
1788 fis.lba_hi_ex = req_task->hob_ports[5];
1789 fis.features_ex = req_task->hob_ports[1];
1790 fis.sect_cnt_ex = req_task->hob_ports[2];
1791
1792 } else {
1793 req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
1794 }
1795
1796 force_single_sector = implicit_sector(fis.command, fis.features);
1797
1798 if ((taskin || taskout) && (!fis.sect_count)) {
1799 if (nsect)
1800 fis.sect_count = nsect;
1801 else {
1802 if (!force_single_sector) {
1803 dev_warn(&dd->pdev->dev,
1804 "data movement but "
1805 "sect_count is 0\n");
1806 err = -EINVAL;
1807 goto abort;
1808 }
1809 }
1810 }
1811
1812 dbg_printk(MTIP_DRV_NAME
1813 "taskfile: cmd %x, feat %x, nsect %x,"
1814 " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
1815 " head/dev %x\n",
1816 fis.command,
1817 fis.features,
1818 fis.sect_count,
1819 fis.lba_low,
1820 fis.lba_mid,
1821 fis.lba_hi,
1822 fis.device);
1823
1824 switch (fis.command) {
1825 case ATA_CMD_DOWNLOAD_MICRO:
1826 /* Change timeout for Download Microcode to 60 seconds.*/
1827 timeout = 60000;
1828 break;
1829 case ATA_CMD_SEC_ERASE_UNIT:
1830 /* Change timeout for Security Erase Unit to 4 minutes.*/
1831 timeout = 240000;
1832 break;
1833 case ATA_CMD_STANDBYNOW1:
1834 /* Change timeout for standby immediate to 10 seconds.*/
1835 timeout = 10000;
1836 break;
1837 case 0xF7:
1838 case 0xFA:
1839 /* Change timeout for vendor unique command to 10 secs */
1840 timeout = 10000;
1841 break;
1842 case ATA_CMD_SMART:
1843 /* Change timeout for vendor unique command to 10 secs */
1844 timeout = 10000;
1845 break;
1846 default:
1847 timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
1848 break;
1849 }
1850
1851 /* Determine the correct transfer size.*/
1852 if (force_single_sector)
1853 transfer_size = ATA_SECT_SIZE;
1854 else
1855 transfer_size = ATA_SECT_SIZE * fis.sect_count;
1856
1857 /* Execute the command.*/
1858 if (mtip_exec_internal_command(dd->port,
1859 &fis,
1860 5,
1861 dma_buffer,
1862 transfer_size,
1863 0,
1864 GFP_KERNEL,
1865 timeout) < 0) {
1866 err = -EIO;
1867 goto abort;
1868 }
1869
1870 task_file_data = readl(dd->port->mmio+PORT_TFDATA);
1871
1872 if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) {
1873 reply = dd->port->rxfis + RX_FIS_PIO_SETUP;
1874 req_task->io_ports[7] = reply->control;
1875 } else {
1876 reply = dd->port->rxfis + RX_FIS_D2H_REG;
1877 req_task->io_ports[7] = reply->command;
1878 }
1879
1880 /* reclaim the DMA buffers.*/
1881 if (inbuf_dma)
1882 pci_unmap_single(dd->pdev, inbuf_dma,
1883 taskin, DMA_FROM_DEVICE);
1884 if (outbuf_dma)
1885 pci_unmap_single(dd->pdev, outbuf_dma,
1886 taskout, DMA_TO_DEVICE);
1887 inbuf_dma = 0;
1888 outbuf_dma = 0;
1889
1890 /* return the ATA registers to the caller.*/
1891 req_task->io_ports[1] = reply->features;
1892 req_task->io_ports[2] = reply->sect_count;
1893 req_task->io_ports[3] = reply->lba_low;
1894 req_task->io_ports[4] = reply->lba_mid;
1895 req_task->io_ports[5] = reply->lba_hi;
1896 req_task->io_ports[6] = reply->device;
1897
1898 if (req_task->out_flags.all & 1) {
1899
1900 req_task->hob_ports[3] = reply->lba_low_ex;
1901 req_task->hob_ports[4] = reply->lba_mid_ex;
1902 req_task->hob_ports[5] = reply->lba_hi_ex;
1903 req_task->hob_ports[1] = reply->features_ex;
1904 req_task->hob_ports[2] = reply->sect_cnt_ex;
1905 }
1906
1907 /* Com rest after secure erase or lowlevel format */
1908 if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) ||
1909 ((fis.command == 0xFC) &&
1910 (fis.features == 0x27 || fis.features == 0x72 ||
1911 fis.features == 0x62 || fis.features == 0x26))) &&
1912 !(reply->command & 1)) {
1913 mtip_restart_port(dd->port);
1914 }
1915
1916 dbg_printk(MTIP_DRV_NAME
1917 "%s: Completion: stat %x,"
1918 "err %x, sect_cnt %x, lbalo %x,"
1919 "lbamid %x, lbahi %x, dev %x\n",
1920 __func__,
1921 req_task->io_ports[7],
1922 req_task->io_ports[1],
1923 req_task->io_ports[2],
1924 req_task->io_ports[3],
1925 req_task->io_ports[4],
1926 req_task->io_ports[5],
1927 req_task->io_ports[6]);
1928
1929 if (taskout) {
1930 if (copy_to_user(buf + outtotal, outbuf, taskout)) {
1931 err = -EFAULT;
1932 goto abort;
1933 }
1934 }
1935 if (taskin) {
1936 if (copy_to_user(buf + intotal, inbuf, taskin)) {
1937 err = -EFAULT;
1938 goto abort;
1939 }
1940 }
1941 abort:
1942 if (inbuf_dma)
1943 pci_unmap_single(dd->pdev, inbuf_dma,
1944 taskin, DMA_FROM_DEVICE);
1945 if (outbuf_dma)
1946 pci_unmap_single(dd->pdev, outbuf_dma,
1947 taskout, DMA_TO_DEVICE);
1948 kfree(outbuf);
1949 kfree(inbuf);
1950
1951 return err;
1952 }
1953
1954 /*
1955 * Handle IOCTL calls from the Block Layer.
1956 *
1957 * This function is called by the Block Layer when it receives an IOCTL
1958 * command that it does not understand. If the IOCTL command is not supported
1959 * this function returns -ENOTTY.
1960 *
1961 * @dd Pointer to the driver data structure.
1962 * @cmd IOCTL command passed from the Block Layer.
1963 * @arg IOCTL argument passed from the Block Layer.
1964 *
1965 * return value
1966 * 0 The IOCTL completed successfully.
1967 * -ENOTTY The specified command is not supported.
1968 * -EFAULT An error occurred copying data to a user space buffer.
1969 * -EIO An error occurred while executing the command.
1970 */
1971 static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
1972 unsigned long arg)
1973 {
1974 switch (cmd) {
1975 case HDIO_GET_IDENTITY:
1976 if (mtip_get_identify(dd->port, (void __user *) arg) < 0) {
1977 dev_warn(&dd->pdev->dev,
1978 "Unable to read identity\n");
1979 return -EIO;
1980 }
1981
1982 break;
1983 case HDIO_DRIVE_CMD:
1984 {
1985 u8 drive_command[4];
1986
1987 /* Copy the user command info to our buffer. */
1988 if (copy_from_user(drive_command,
1989 (void __user *) arg,
1990 sizeof(drive_command)))
1991 return -EFAULT;
1992
1993 /* Execute the drive command. */
1994 if (exec_drive_command(dd->port,
1995 drive_command,
1996 (void __user *) (arg+4)))
1997 return -EIO;
1998
1999 /* Copy the status back to the users buffer. */
2000 if (copy_to_user((void __user *) arg,
2001 drive_command,
2002 sizeof(drive_command)))
2003 return -EFAULT;
2004
2005 break;
2006 }
2007 case HDIO_DRIVE_TASK:
2008 {
2009 u8 drive_command[7];
2010
2011 /* Copy the user command info to our buffer. */
2012 if (copy_from_user(drive_command,
2013 (void __user *) arg,
2014 sizeof(drive_command)))
2015 return -EFAULT;
2016
2017 /* Execute the drive command. */
2018 if (exec_drive_task(dd->port, drive_command))
2019 return -EIO;
2020
2021 /* Copy the status back to the users buffer. */
2022 if (copy_to_user((void __user *) arg,
2023 drive_command,
2024 sizeof(drive_command)))
2025 return -EFAULT;
2026
2027 break;
2028 }
2029 case HDIO_DRIVE_TASKFILE: {
2030 ide_task_request_t req_task;
2031 int ret, outtotal;
2032
2033 if (copy_from_user(&req_task, (void __user *) arg,
2034 sizeof(req_task)))
2035 return -EFAULT;
2036
2037 outtotal = sizeof(req_task);
2038
2039 ret = exec_drive_taskfile(dd, (void __user *) arg,
2040 &req_task, outtotal);
2041
2042 if (copy_to_user((void __user *) arg, &req_task,
2043 sizeof(req_task)))
2044 return -EFAULT;
2045
2046 return ret;
2047 }
2048
2049 default:
2050 return -EINVAL;
2051 }
2052 return 0;
2053 }
2054
2055 /*
2056 * Submit an IO to the hw
2057 *
2058 * This function is called by the block layer to issue an io
2059 * to the device. Upon completion, the callback function will
2060 * be called with the data parameter passed as the callback data.
2061 *
2062 * @dd Pointer to the driver data structure.
2063 * @start First sector to read.
2064 * @nsect Number of sectors to read.
2065 * @nents Number of entries in scatter list for the read command.
2066 * @tag The tag of this read command.
2067 * @callback Pointer to the function that should be called
2068 * when the read completes.
2069 * @data Callback data passed to the callback function
2070 * when the read completes.
2071 * @barrier If non-zero, this command must be completed before
2072 * issuing any other commands.
2073 * @dir Direction (read or write)
2074 *
2075 * return value
2076 * None
2077 */
2078 static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
2079 int nsect, int nents, int tag, void *callback,
2080 void *data, int barrier, int dir)
2081 {
2082 struct host_to_dev_fis *fis;
2083 struct mtip_port *port = dd->port;
2084 struct mtip_cmd *command = &port->commands[tag];
2085
2086 /* Map the scatter list for DMA access */
2087 if (dir == READ)
2088 nents = dma_map_sg(&dd->pdev->dev, command->sg,
2089 nents, DMA_FROM_DEVICE);
2090 else
2091 nents = dma_map_sg(&dd->pdev->dev, command->sg,
2092 nents, DMA_TO_DEVICE);
2093
2094 command->scatter_ents = nents;
2095
2096 /*
2097 * The number of retries for this command before it is
2098 * reported as a failure to the upper layers.
2099 */
2100 command->retries = MTIP_MAX_RETRIES;
2101
2102 /* Fill out fis */
2103 fis = command->command;
2104 fis->type = 0x27;
2105 fis->opts = 1 << 7;
2106 fis->command =
2107 (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
2108 *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF);
2109 *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF);
2110 fis->device = 1 << 6;
2111 if (barrier)
2112 fis->device |= FUA_BIT;
2113 fis->features = nsect & 0xFF;
2114 fis->features_ex = (nsect >> 8) & 0xFF;
2115 fis->sect_count = ((tag << 3) | (tag >> 5));
2116 fis->sect_cnt_ex = 0;
2117 fis->control = 0;
2118 fis->res2 = 0;
2119 fis->res3 = 0;
2120 fill_command_sg(dd, command, nents);
2121
2122 /* Populate the command header */
2123 command->command_header->opts =
2124 __force_bit2int cpu_to_le32(
2125 (nents << 16) | 5 | AHCI_CMD_PREFETCH);
2126 command->command_header->byte_count = 0;
2127
2128 /*
2129 * Set the completion function and data for the command
2130 * within this layer.
2131 */
2132 command->comp_data = dd;
2133 command->comp_func = mtip_async_complete;
2134 command->direction = (dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2135
2136 /*
2137 * Set the completion function and data for the command passed
2138 * from the upper layer.
2139 */
2140 command->async_data = data;
2141 command->async_callback = callback;
2142
2143 /*
2144 * To prevent this command from being issued
2145 * if an internal command is in progress or error handling is active.
2146 */
2147 if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) ||
2148 test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) {
2149 set_bit(tag, port->cmds_to_issue);
2150 set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
2151 return;
2152 }
2153
2154 /* Issue the command to the hardware */
2155 mtip_issue_ncq_command(port, tag);
2156
2157 /* Set the command's timeout value.*/
2158 port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
2159 MTIP_NCQ_COMMAND_TIMEOUT_MS);
2160 }
2161
2162 /*
2163 * Release a command slot.
2164 *
2165 * @dd Pointer to the driver data structure.
2166 * @tag Slot tag
2167 *
2168 * return value
2169 * None
2170 */
2171 static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag)
2172 {
2173 release_slot(dd->port, tag);
2174 }
2175
2176 /*
2177 * Obtain a command slot and return its associated scatter list.
2178 *
2179 * @dd Pointer to the driver data structure.
2180 * @tag Pointer to an int that will receive the allocated command
2181 * slot tag.
2182 *
2183 * return value
2184 * Pointer to the scatter list for the allocated command slot
2185 * or NULL if no command slots are available.
2186 */
2187 static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
2188 int *tag)
2189 {
2190 /*
2191 * It is possible that, even with this semaphore, a thread
2192 * may think that no command slots are available. Therefore, we
2193 * need to make an attempt to get_slot().
2194 */
2195 down(&dd->port->cmd_slot);
2196 *tag = get_slot(dd->port);
2197
2198 if (unlikely(*tag < 0))
2199 return NULL;
2200
2201 return dd->port->commands[*tag].sg;
2202 }
2203
2204 /*
2205 * Sysfs register/status dump.
2206 *
2207 * @dev Pointer to the device structure, passed by the kernrel.
2208 * @attr Pointer to the device_attribute structure passed by the kernel.
2209 * @buf Pointer to the char buffer that will receive the stats info.
2210 *
2211 * return value
2212 * The size, in bytes, of the data copied into buf.
2213 */
2214 static ssize_t hw_show_registers(struct device *dev,
2215 struct device_attribute *attr,
2216 char *buf)
2217 {
2218 u32 group_allocated;
2219 struct driver_data *dd = dev_to_disk(dev)->private_data;
2220 int size = 0;
2221 int n;
2222
2223 size += sprintf(&buf[size], "%s:\ns_active:\n", __func__);
2224
2225 for (n = 0; n < dd->slot_groups; n++)
2226 size += sprintf(&buf[size], "0x%08x\n",
2227 readl(dd->port->s_active[n]));
2228
2229 size += sprintf(&buf[size], "Command Issue:\n");
2230
2231 for (n = 0; n < dd->slot_groups; n++)
2232 size += sprintf(&buf[size], "0x%08x\n",
2233 readl(dd->port->cmd_issue[n]));
2234
2235 size += sprintf(&buf[size], "Allocated:\n");
2236
2237 for (n = 0; n < dd->slot_groups; n++) {
2238 if (sizeof(long) > sizeof(u32))
2239 group_allocated =
2240 dd->port->allocated[n/2] >> (32*(n&1));
2241 else
2242 group_allocated = dd->port->allocated[n];
2243 size += sprintf(&buf[size], "0x%08x\n",
2244 group_allocated);
2245 }
2246
2247 size += sprintf(&buf[size], "completed:\n");
2248
2249 for (n = 0; n < dd->slot_groups; n++)
2250 size += sprintf(&buf[size], "0x%08x\n",
2251 readl(dd->port->completed[n]));
2252
2253 size += sprintf(&buf[size], "PORT_IRQ_STAT 0x%08x\n",
2254 readl(dd->port->mmio + PORT_IRQ_STAT));
2255 size += sprintf(&buf[size], "HOST_IRQ_STAT 0x%08x\n",
2256 readl(dd->mmio + HOST_IRQ_STAT));
2257
2258 return size;
2259 }
2260 static DEVICE_ATTR(registers, S_IRUGO, hw_show_registers, NULL);
2261
2262 /*
2263 * Create the sysfs related attributes.
2264 *
2265 * @dd Pointer to the driver data structure.
2266 * @kobj Pointer to the kobj for the block device.
2267 *
2268 * return value
2269 * 0 Operation completed successfully.
2270 * -EINVAL Invalid parameter.
2271 */
2272 static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
2273 {
2274 if (!kobj || !dd)
2275 return -EINVAL;
2276
2277 if (sysfs_create_file(kobj, &dev_attr_registers.attr))
2278 dev_warn(&dd->pdev->dev,
2279 "Error creating registers sysfs entry\n");
2280 return 0;
2281 }
2282
2283 /*
2284 * Remove the sysfs related attributes.
2285 *
2286 * @dd Pointer to the driver data structure.
2287 * @kobj Pointer to the kobj for the block device.
2288 *
2289 * return value
2290 * 0 Operation completed successfully.
2291 * -EINVAL Invalid parameter.
2292 */
2293 static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
2294 {
2295 if (!kobj || !dd)
2296 return -EINVAL;
2297
2298 sysfs_remove_file(kobj, &dev_attr_registers.attr);
2299
2300 return 0;
2301 }
2302
2303 /*
2304 * Perform any init/resume time hardware setup
2305 *
2306 * @dd Pointer to the driver data structure.
2307 *
2308 * return value
2309 * None
2310 */
2311 static inline void hba_setup(struct driver_data *dd)
2312 {
2313 u32 hwdata;
2314 hwdata = readl(dd->mmio + HOST_HSORG);
2315
2316 /* interrupt bug workaround: use only 1 IS bit.*/
2317 writel(hwdata |
2318 HSORG_DISABLE_SLOTGRP_INTR |
2319 HSORG_DISABLE_SLOTGRP_PXIS,
2320 dd->mmio + HOST_HSORG);
2321 }
2322
2323 /*
2324 * Detect the details of the product, and store anything needed
2325 * into the driver data structure. This includes product type and
2326 * version and number of slot groups.
2327 *
2328 * @dd Pointer to the driver data structure.
2329 *
2330 * return value
2331 * None
2332 */
2333 static void mtip_detect_product(struct driver_data *dd)
2334 {
2335 u32 hwdata;
2336 unsigned int rev, slotgroups;
2337
2338 /*
2339 * HBA base + 0xFC [15:0] - vendor-specific hardware interface
2340 * info register:
2341 * [15:8] hardware/software interface rev#
2342 * [ 3] asic-style interface
2343 * [ 2:0] number of slot groups, minus 1 (only valid for asic-style).
2344 */
2345 hwdata = readl(dd->mmio + HOST_HSORG);
2346
2347 dd->product_type = MTIP_PRODUCT_UNKNOWN;
2348 dd->slot_groups = 1;
2349
2350 if (hwdata & 0x8) {
2351 dd->product_type = MTIP_PRODUCT_ASICFPGA;
2352 rev = (hwdata & HSORG_HWREV) >> 8;
2353 slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1;
2354 dev_info(&dd->pdev->dev,
2355 "ASIC-FPGA design, HS rev 0x%x, "
2356 "%i slot groups [%i slots]\n",
2357 rev,
2358 slotgroups,
2359 slotgroups * 32);
2360
2361 if (slotgroups > MTIP_MAX_SLOT_GROUPS) {
2362 dev_warn(&dd->pdev->dev,
2363 "Warning: driver only supports "
2364 "%i slot groups.\n", MTIP_MAX_SLOT_GROUPS);
2365 slotgroups = MTIP_MAX_SLOT_GROUPS;
2366 }
2367 dd->slot_groups = slotgroups;
2368 return;
2369 }
2370
2371 dev_warn(&dd->pdev->dev, "Unrecognized product id\n");
2372 }
2373
2374 /*
2375 * Blocking wait for FTL rebuild to complete
2376 *
2377 * @dd Pointer to the DRIVER_DATA structure.
2378 *
2379 * return value
2380 * 0 FTL rebuild completed successfully
2381 * -EFAULT FTL rebuild error/timeout/interruption
2382 */
2383 static int mtip_ftl_rebuild_poll(struct driver_data *dd)
2384 {
2385 unsigned long timeout, cnt = 0, start;
2386
2387 dev_warn(&dd->pdev->dev,
2388 "FTL rebuild in progress. Polling for completion.\n");
2389
2390 start = jiffies;
2391 dd->ftlrebuildflag = 1;
2392 timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
2393
2394 do {
2395 if (mtip_check_surprise_removal(dd->pdev))
2396 return -EFAULT;
2397
2398 if (mtip_get_identify(dd->port, NULL) < 0)
2399 return -EFAULT;
2400
2401 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
2402 MTIP_FTL_REBUILD_MAGIC) {
2403 ssleep(1);
2404 /* Print message every 3 minutes */
2405 if (cnt++ >= 180) {
2406 dev_warn(&dd->pdev->dev,
2407 "FTL rebuild in progress (%d secs).\n",
2408 jiffies_to_msecs(jiffies - start) / 1000);
2409 cnt = 0;
2410 }
2411 } else {
2412 dev_warn(&dd->pdev->dev,
2413 "FTL rebuild complete (%d secs).\n",
2414 jiffies_to_msecs(jiffies - start) / 1000);
2415 dd->ftlrebuildflag = 0;
2416 mtip_block_initialize(dd);
2417 break;
2418 }
2419 ssleep(10);
2420 } while (time_before(jiffies, timeout));
2421
2422 /* Check for timeout */
2423 if (dd->ftlrebuildflag) {
2424 dev_err(&dd->pdev->dev,
2425 "Timed out waiting for FTL rebuild to complete (%d secs).\n",
2426 jiffies_to_msecs(jiffies - start) / 1000);
2427 return -EFAULT;
2428 }
2429
2430 return 0;
2431 }
2432
2433 /*
2434 * service thread to issue queued commands
2435 *
2436 * @data Pointer to the driver data structure.
2437 *
2438 * return value
2439 * 0
2440 */
2441
2442 static int mtip_service_thread(void *data)
2443 {
2444 struct driver_data *dd = (struct driver_data *)data;
2445 unsigned long slot, slot_start, slot_wrap;
2446 unsigned int num_cmd_slots = dd->slot_groups * 32;
2447 struct mtip_port *port = dd->port;
2448
2449 while (1) {
2450 /*
2451 * the condition is to check neither an internal command is
2452 * is in progress nor error handling is active
2453 */
2454 wait_event_interruptible(port->svc_wait, (port->flags) &&
2455 !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
2456 !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags));
2457
2458 if (kthread_should_stop())
2459 break;
2460
2461 set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
2462 if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
2463 slot = 1;
2464 /* used to restrict the loop to one iteration */
2465 slot_start = num_cmd_slots;
2466 slot_wrap = 0;
2467 while (1) {
2468 slot = find_next_bit(port->cmds_to_issue,
2469 num_cmd_slots, slot);
2470 if (slot_wrap == 1) {
2471 if ((slot_start >= slot) ||
2472 (slot >= num_cmd_slots))
2473 break;
2474 }
2475 if (unlikely(slot_start == num_cmd_slots))
2476 slot_start = slot;
2477
2478 if (unlikely(slot == num_cmd_slots)) {
2479 slot = 1;
2480 slot_wrap = 1;
2481 continue;
2482 }
2483
2484 /* Issue the command to the hardware */
2485 mtip_issue_ncq_command(port, slot);
2486
2487 /* Set the command's timeout value.*/
2488 port->commands[slot].comp_time = jiffies +
2489 msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS);
2490
2491 clear_bit(slot, port->cmds_to_issue);
2492 }
2493
2494 clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
2495 } else if (test_bit(MTIP_FLAG_REBUILD_BIT, &port->flags)) {
2496 mtip_ftl_rebuild_poll(dd);
2497 clear_bit(MTIP_FLAG_REBUILD_BIT, &port->flags);
2498 }
2499 clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
2500
2501 if (test_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &port->flags))
2502 break;
2503 }
2504 return 0;
2505 }
2506
2507 /*
2508 * Called once for each card.
2509 *
2510 * @dd Pointer to the driver data structure.
2511 *
2512 * return value
2513 * 0 on success, else an error code.
2514 */
2515 static int mtip_hw_init(struct driver_data *dd)
2516 {
2517 int i;
2518 int rv;
2519 unsigned int num_command_slots;
2520
2521 dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
2522
2523 mtip_detect_product(dd);
2524 if (dd->product_type == MTIP_PRODUCT_UNKNOWN) {
2525 rv = -EIO;
2526 goto out1;
2527 }
2528 num_command_slots = dd->slot_groups * 32;
2529
2530 hba_setup(dd);
2531
2532 tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd);
2533
2534 dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL);
2535 if (!dd->port) {
2536 dev_err(&dd->pdev->dev,
2537 "Memory allocation: port structure\n");
2538 return -ENOMEM;
2539 }
2540
2541 /* Counting semaphore to track command slot usage */
2542 sema_init(&dd->port->cmd_slot, num_command_slots - 1);
2543
2544 /* Spinlock to prevent concurrent issue */
2545 spin_lock_init(&dd->port->cmd_issue_lock);
2546
2547 /* Set the port mmio base address. */
2548 dd->port->mmio = dd->mmio + PORT_OFFSET;
2549 dd->port->dd = dd;
2550
2551 /* Allocate memory for the command list. */
2552 dd->port->command_list =
2553 dmam_alloc_coherent(&dd->pdev->dev,
2554 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
2555 &dd->port->command_list_dma,
2556 GFP_KERNEL);
2557 if (!dd->port->command_list) {
2558 dev_err(&dd->pdev->dev,
2559 "Memory allocation: command list\n");
2560 rv = -ENOMEM;
2561 goto out1;
2562 }
2563
2564 /* Clear the memory we have allocated. */
2565 memset(dd->port->command_list,
2566 0,
2567 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2));
2568
2569 /* Setup the addresse of the RX FIS. */
2570 dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
2571 dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
2572
2573 /* Setup the address of the command tables. */
2574 dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ;
2575 dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
2576
2577 /* Setup the address of the identify data. */
2578 dd->port->identify = dd->port->command_table +
2579 HW_CMD_TBL_AR_SZ;
2580 dd->port->identify_dma = dd->port->command_tbl_dma +
2581 HW_CMD_TBL_AR_SZ;
2582
2583 /* Setup the address of the sector buffer. */
2584 dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
2585 dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
2586
2587 /* Point the command headers at the command tables. */
2588 for (i = 0; i < num_command_slots; i++) {
2589 dd->port->commands[i].command_header =
2590 dd->port->command_list +
2591 (sizeof(struct mtip_cmd_hdr) * i);
2592 dd->port->commands[i].command_header_dma =
2593 dd->port->command_list_dma +
2594 (sizeof(struct mtip_cmd_hdr) * i);
2595
2596 dd->port->commands[i].command =
2597 dd->port->command_table + (HW_CMD_TBL_SZ * i);
2598 dd->port->commands[i].command_dma =
2599 dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
2600
2601 if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
2602 dd->port->commands[i].command_header->ctbau =
2603 __force_bit2int cpu_to_le32(
2604 (dd->port->commands[i].command_dma >> 16) >> 16);
2605 dd->port->commands[i].command_header->ctba =
2606 __force_bit2int cpu_to_le32(
2607 dd->port->commands[i].command_dma & 0xFFFFFFFF);
2608
2609 /*
2610 * If this is not done, a bug is reported by the stock
2611 * FC11 i386. Due to the fact that it has lots of kernel
2612 * debugging enabled.
2613 */
2614 sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
2615
2616 /* Mark all commands as currently inactive.*/
2617 atomic_set(&dd->port->commands[i].active, 0);
2618 }
2619
2620 /* Setup the pointers to the extended s_active and CI registers. */
2621 for (i = 0; i < dd->slot_groups; i++) {
2622 dd->port->s_active[i] =
2623 dd->port->mmio + i*0x80 + PORT_SCR_ACT;
2624 dd->port->cmd_issue[i] =
2625 dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE;
2626 dd->port->completed[i] =
2627 dd->port->mmio + i*0x80 + PORT_SDBV;
2628 }
2629
2630 /* Reset the HBA. */
2631 if (mtip_hba_reset(dd) < 0) {
2632 dev_err(&dd->pdev->dev,
2633 "Card did not reset within timeout\n");
2634 rv = -EIO;
2635 goto out2;
2636 }
2637
2638 mtip_init_port(dd->port);
2639 mtip_start_port(dd->port);
2640
2641 /* Setup the ISR and enable interrupts. */
2642 rv = devm_request_irq(&dd->pdev->dev,
2643 dd->pdev->irq,
2644 mtip_irq_handler,
2645 IRQF_SHARED,
2646 dev_driver_string(&dd->pdev->dev),
2647 dd);
2648
2649 if (rv) {
2650 dev_err(&dd->pdev->dev,
2651 "Unable to allocate IRQ %d\n", dd->pdev->irq);
2652 goto out2;
2653 }
2654
2655 /* Enable interrupts on the HBA. */
2656 writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
2657 dd->mmio + HOST_CTL);
2658
2659 init_timer(&dd->port->cmd_timer);
2660 init_waitqueue_head(&dd->port->svc_wait);
2661
2662 dd->port->cmd_timer.data = (unsigned long int) dd->port;
2663 dd->port->cmd_timer.function = mtip_timeout_function;
2664 mod_timer(&dd->port->cmd_timer,
2665 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
2666
2667 if (mtip_get_identify(dd->port, NULL) < 0) {
2668 rv = -EFAULT;
2669 goto out3;
2670 }
2671
2672 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
2673 MTIP_FTL_REBUILD_MAGIC) {
2674 set_bit(MTIP_FLAG_REBUILD_BIT, &dd->port->flags);
2675 return MTIP_FTL_REBUILD_MAGIC;
2676 }
2677 mtip_dump_identify(dd->port);
2678 return rv;
2679
2680 out3:
2681 del_timer_sync(&dd->port->cmd_timer);
2682
2683 /* Disable interrupts on the HBA. */
2684 writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
2685 dd->mmio + HOST_CTL);
2686
2687 /*Release the IRQ. */
2688 devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
2689
2690 out2:
2691 mtip_deinit_port(dd->port);
2692
2693 /* Free the command/command header memory. */
2694 dmam_free_coherent(&dd->pdev->dev,
2695 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
2696 dd->port->command_list,
2697 dd->port->command_list_dma);
2698 out1:
2699 /* Free the memory allocated for the for structure. */
2700 kfree(dd->port);
2701
2702 return rv;
2703 }
2704
2705 /*
2706 * Called to deinitialize an interface.
2707 *
2708 * @dd Pointer to the driver data structure.
2709 *
2710 * return value
2711 * 0
2712 */
2713 static int mtip_hw_exit(struct driver_data *dd)
2714 {
2715 /*
2716 * Send standby immediate (E0h) to the drive so that it
2717 * saves its state.
2718 */
2719 if (atomic_read(&dd->drv_cleanup_done) != true) {
2720
2721 mtip_standby_immediate(dd->port);
2722
2723 /* de-initialize the port. */
2724 mtip_deinit_port(dd->port);
2725
2726 /* Disable interrupts on the HBA. */
2727 writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
2728 dd->mmio + HOST_CTL);
2729 }
2730
2731 del_timer_sync(&dd->port->cmd_timer);
2732
2733 /* Release the IRQ. */
2734 devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
2735
2736 /* Stop the bottom half tasklet. */
2737 tasklet_kill(&dd->tasklet);
2738
2739 /* Free the command/command header memory. */
2740 dmam_free_coherent(&dd->pdev->dev,
2741 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
2742 dd->port->command_list,
2743 dd->port->command_list_dma);
2744 /* Free the memory allocated for the for structure. */
2745 kfree(dd->port);
2746
2747 return 0;
2748 }
2749
2750 /*
2751 * Issue a Standby Immediate command to the device.
2752 *
2753 * This function is called by the Block Layer just before the
2754 * system powers off during a shutdown.
2755 *
2756 * @dd Pointer to the driver data structure.
2757 *
2758 * return value
2759 * 0
2760 */
2761 static int mtip_hw_shutdown(struct driver_data *dd)
2762 {
2763 /*
2764 * Send standby immediate (E0h) to the drive so that it
2765 * saves its state.
2766 */
2767 mtip_standby_immediate(dd->port);
2768
2769 return 0;
2770 }
2771
2772 /*
2773 * Suspend function
2774 *
2775 * This function is called by the Block Layer just before the
2776 * system hibernates.
2777 *
2778 * @dd Pointer to the driver data structure.
2779 *
2780 * return value
2781 * 0 Suspend was successful
2782 * -EFAULT Suspend was not successful
2783 */
2784 static int mtip_hw_suspend(struct driver_data *dd)
2785 {
2786 /*
2787 * Send standby immediate (E0h) to the drive
2788 * so that it saves its state.
2789 */
2790 if (mtip_standby_immediate(dd->port) != 0) {
2791 dev_err(&dd->pdev->dev,
2792 "Failed standby-immediate command\n");
2793 return -EFAULT;
2794 }
2795
2796 /* Disable interrupts on the HBA.*/
2797 writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
2798 dd->mmio + HOST_CTL);
2799 mtip_deinit_port(dd->port);
2800
2801 return 0;
2802 }
2803
2804 /*
2805 * Resume function
2806 *
2807 * This function is called by the Block Layer as the
2808 * system resumes.
2809 *
2810 * @dd Pointer to the driver data structure.
2811 *
2812 * return value
2813 * 0 Resume was successful
2814 * -EFAULT Resume was not successful
2815 */
2816 static int mtip_hw_resume(struct driver_data *dd)
2817 {
2818 /* Perform any needed hardware setup steps */
2819 hba_setup(dd);
2820
2821 /* Reset the HBA */
2822 if (mtip_hba_reset(dd) != 0) {
2823 dev_err(&dd->pdev->dev,
2824 "Unable to reset the HBA\n");
2825 return -EFAULT;
2826 }
2827
2828 /*
2829 * Enable the port, DMA engine, and FIS reception specific
2830 * h/w in controller.
2831 */
2832 mtip_init_port(dd->port);
2833 mtip_start_port(dd->port);
2834
2835 /* Enable interrupts on the HBA.*/
2836 writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
2837 dd->mmio + HOST_CTL);
2838
2839 return 0;
2840 }
2841
2842 /*
2843 * Helper function for reusing disk name
2844 * upon hot insertion.
2845 */
2846 static int rssd_disk_name_format(char *prefix,
2847 int index,
2848 char *buf,
2849 int buflen)
2850 {
2851 const int base = 'z' - 'a' + 1;
2852 char *begin = buf + strlen(prefix);
2853 char *end = buf + buflen;
2854 char *p;
2855 int unit;
2856
2857 p = end - 1;
2858 *p = '\0';
2859 unit = base;
2860 do {
2861 if (p == begin)
2862 return -EINVAL;
2863 *--p = 'a' + (index % unit);
2864 index = (index / unit) - 1;
2865 } while (index >= 0);
2866
2867 memmove(begin, p, end - p);
2868 memcpy(buf, prefix, strlen(prefix));
2869
2870 return 0;
2871 }
2872
2873 /*
2874 * Block layer IOCTL handler.
2875 *
2876 * @dev Pointer to the block_device structure.
2877 * @mode ignored
2878 * @cmd IOCTL command passed from the user application.
2879 * @arg Argument passed from the user application.
2880 *
2881 * return value
2882 * 0 IOCTL completed successfully.
2883 * -ENOTTY IOCTL not supported or invalid driver data
2884 * structure pointer.
2885 */
2886 static int mtip_block_ioctl(struct block_device *dev,
2887 fmode_t mode,
2888 unsigned cmd,
2889 unsigned long arg)
2890 {
2891 struct driver_data *dd = dev->bd_disk->private_data;
2892
2893 if (!capable(CAP_SYS_ADMIN))
2894 return -EACCES;
2895
2896 if (!dd)
2897 return -ENOTTY;
2898
2899 switch (cmd) {
2900 case BLKFLSBUF:
2901 return -ENOTTY;
2902 default:
2903 return mtip_hw_ioctl(dd, cmd, arg);
2904 }
2905 }
2906
2907 #ifdef CONFIG_COMPAT
2908 /*
2909 * Block layer compat IOCTL handler.
2910 *
2911 * @dev Pointer to the block_device structure.
2912 * @mode ignored
2913 * @cmd IOCTL command passed from the user application.
2914 * @arg Argument passed from the user application.
2915 *
2916 * return value
2917 * 0 IOCTL completed successfully.
2918 * -ENOTTY IOCTL not supported or invalid driver data
2919 * structure pointer.
2920 */
2921 static int mtip_block_compat_ioctl(struct block_device *dev,
2922 fmode_t mode,
2923 unsigned cmd,
2924 unsigned long arg)
2925 {
2926 struct driver_data *dd = dev->bd_disk->private_data;
2927
2928 if (!capable(CAP_SYS_ADMIN))
2929 return -EACCES;
2930
2931 if (!dd)
2932 return -ENOTTY;
2933
2934 switch (cmd) {
2935 case BLKFLSBUF:
2936 return -ENOTTY;
2937 case HDIO_DRIVE_TASKFILE: {
2938 struct mtip_compat_ide_task_request_s __user *compat_req_task;
2939 ide_task_request_t req_task;
2940 int compat_tasksize, outtotal, ret;
2941
2942 compat_tasksize =
2943 sizeof(struct mtip_compat_ide_task_request_s);
2944
2945 compat_req_task =
2946 (struct mtip_compat_ide_task_request_s __user *) arg;
2947
2948 if (copy_from_user(&req_task, (void __user *) arg,
2949 compat_tasksize - (2 * sizeof(compat_long_t))))
2950 return -EFAULT;
2951
2952 if (get_user(req_task.out_size, &compat_req_task->out_size))
2953 return -EFAULT;
2954
2955 if (get_user(req_task.in_size, &compat_req_task->in_size))
2956 return -EFAULT;
2957
2958 outtotal = sizeof(struct mtip_compat_ide_task_request_s);
2959
2960 ret = exec_drive_taskfile(dd, (void __user *) arg,
2961 &req_task, outtotal);
2962
2963 if (copy_to_user((void __user *) arg, &req_task,
2964 compat_tasksize -
2965 (2 * sizeof(compat_long_t))))
2966 return -EFAULT;
2967
2968 if (put_user(req_task.out_size, &compat_req_task->out_size))
2969 return -EFAULT;
2970
2971 if (put_user(req_task.in_size, &compat_req_task->in_size))
2972 return -EFAULT;
2973
2974 return ret;
2975 }
2976 default:
2977 return mtip_hw_ioctl(dd, cmd, arg);
2978 }
2979 }
2980 #endif
2981
2982 /*
2983 * Obtain the geometry of the device.
2984 *
2985 * You may think that this function is obsolete, but some applications,
2986 * fdisk for example still used CHS values. This function describes the
2987 * device as having 224 heads and 56 sectors per cylinder. These values are
2988 * chosen so that each cylinder is aligned on a 4KB boundary. Since a
2989 * partition is described in terms of a start and end cylinder this means
2990 * that each partition is also 4KB aligned. Non-aligned partitions adversely
2991 * affects performance.
2992 *
2993 * @dev Pointer to the block_device strucutre.
2994 * @geo Pointer to a hd_geometry structure.
2995 *
2996 * return value
2997 * 0 Operation completed successfully.
2998 * -ENOTTY An error occurred while reading the drive capacity.
2999 */
3000 static int mtip_block_getgeo(struct block_device *dev,
3001 struct hd_geometry *geo)
3002 {
3003 struct driver_data *dd = dev->bd_disk->private_data;
3004 sector_t capacity;
3005
3006 if (!dd)
3007 return -ENOTTY;
3008
3009 if (!(mtip_hw_get_capacity(dd, &capacity))) {
3010 dev_warn(&dd->pdev->dev,
3011 "Could not get drive capacity.\n");
3012 return -ENOTTY;
3013 }
3014
3015 geo->heads = 224;
3016 geo->sectors = 56;
3017 sector_div(capacity, (geo->heads * geo->sectors));
3018 geo->cylinders = capacity;
3019 return 0;
3020 }
3021
3022 /*
3023 * Block device operation function.
3024 *
3025 * This structure contains pointers to the functions required by the block
3026 * layer.
3027 */
3028 static const struct block_device_operations mtip_block_ops = {
3029 .ioctl = mtip_block_ioctl,
3030 #ifdef CONFIG_COMPAT
3031 .compat_ioctl = mtip_block_compat_ioctl,
3032 #endif
3033 .getgeo = mtip_block_getgeo,
3034 .owner = THIS_MODULE
3035 };
3036
3037 /*
3038 * Block layer make request function.
3039 *
3040 * This function is called by the kernel to process a BIO for
3041 * the P320 device.
3042 *
3043 * @queue Pointer to the request queue. Unused other than to obtain
3044 * the driver data structure.
3045 * @bio Pointer to the BIO.
3046 *
3047 */
3048 static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3049 {
3050 struct driver_data *dd = queue->queuedata;
3051 struct scatterlist *sg;
3052 struct bio_vec *bvec;
3053 int nents = 0;
3054 int tag = 0;
3055
3056 if (unlikely(!bio_has_data(bio))) {
3057 blk_queue_flush(queue, 0);
3058 bio_endio(bio, 0);
3059 return;
3060 }
3061
3062 sg = mtip_hw_get_scatterlist(dd, &tag);
3063 if (likely(sg != NULL)) {
3064 blk_queue_bounce(queue, &bio);
3065
3066 if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
3067 dev_warn(&dd->pdev->dev,
3068 "Maximum number of SGL entries exceeded");
3069 bio_io_error(bio);
3070 mtip_hw_release_scatterlist(dd, tag);
3071 return;
3072 }
3073
3074 /* Create the scatter list for this bio. */
3075 bio_for_each_segment(bvec, bio, nents) {
3076 sg_set_page(&sg[nents],
3077 bvec->bv_page,
3078 bvec->bv_len,
3079 bvec->bv_offset);
3080 }
3081
3082 /* Issue the read/write. */
3083 mtip_hw_submit_io(dd,
3084 bio->bi_sector,
3085 bio_sectors(bio),
3086 nents,
3087 tag,
3088 bio_endio,
3089 bio,
3090 bio->bi_rw & REQ_FUA,
3091 bio_data_dir(bio));
3092 } else
3093 bio_io_error(bio);
3094 }
3095
3096 /*
3097 * Block layer initialization function.
3098 *
3099 * This function is called once by the PCI layer for each P320
3100 * device that is connected to the system.
3101 *
3102 * @dd Pointer to the driver data structure.
3103 *
3104 * return value
3105 * 0 on success else an error code.
3106 */
3107 static int mtip_block_initialize(struct driver_data *dd)
3108 {
3109 int rv = 0, wait_for_rebuild = 0;
3110 sector_t capacity;
3111 unsigned int index = 0;
3112 struct kobject *kobj;
3113 unsigned char thd_name[16];
3114
3115 if (dd->disk)
3116 goto skip_create_disk; /* hw init done, before rebuild */
3117
3118 /* Initialize the protocol layer. */
3119 wait_for_rebuild = mtip_hw_init(dd);
3120 if (wait_for_rebuild < 0) {
3121 dev_err(&dd->pdev->dev,
3122 "Protocol layer initialization failed\n");
3123 rv = -EINVAL;
3124 goto protocol_init_error;
3125 }
3126
3127 dd->disk = alloc_disk(MTIP_MAX_MINORS);
3128 if (dd->disk == NULL) {
3129 dev_err(&dd->pdev->dev,
3130 "Unable to allocate gendisk structure\n");
3131 rv = -EINVAL;
3132 goto alloc_disk_error;
3133 }
3134
3135 /* Generate the disk name, implemented same as in sd.c */
3136 do {
3137 if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL))
3138 goto ida_get_error;
3139
3140 spin_lock(&rssd_index_lock);
3141 rv = ida_get_new(&rssd_index_ida, &index);
3142 spin_unlock(&rssd_index_lock);
3143 } while (rv == -EAGAIN);
3144
3145 if (rv)
3146 goto ida_get_error;
3147
3148 rv = rssd_disk_name_format("rssd",
3149 index,
3150 dd->disk->disk_name,
3151 DISK_NAME_LEN);
3152 if (rv)
3153 goto disk_index_error;
3154
3155 dd->disk->driverfs_dev = &dd->pdev->dev;
3156 dd->disk->major = dd->major;
3157 dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS;
3158 dd->disk->fops = &mtip_block_ops;
3159 dd->disk->private_data = dd;
3160 dd->index = index;
3161
3162 /*
3163 * if rebuild pending, start the service thread, and delay the block
3164 * queue creation and add_disk()
3165 */
3166 if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
3167 goto start_service_thread;
3168
3169 skip_create_disk:
3170 /* Allocate the request queue. */
3171 dd->queue = blk_alloc_queue(GFP_KERNEL);
3172 if (dd->queue == NULL) {
3173 dev_err(&dd->pdev->dev,
3174 "Unable to allocate request queue\n");
3175 rv = -ENOMEM;
3176 goto block_queue_alloc_init_error;
3177 }
3178
3179 /* Attach our request function to the request queue. */
3180 blk_queue_make_request(dd->queue, mtip_make_request);
3181
3182 dd->disk->queue = dd->queue;
3183 dd->queue->queuedata = dd;
3184
3185 /* Set device limits. */
3186 set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
3187 blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
3188 blk_queue_physical_block_size(dd->queue, 4096);
3189 blk_queue_io_min(dd->queue, 4096);
3190 blk_queue_flush(dd->queue, 0);
3191
3192 /* Set the capacity of the device in 512 byte sectors. */
3193 if (!(mtip_hw_get_capacity(dd, &capacity))) {
3194 dev_warn(&dd->pdev->dev,
3195 "Could not read drive capacity\n");
3196 rv = -EIO;
3197 goto read_capacity_error;
3198 }
3199 set_capacity(dd->disk, capacity);
3200
3201 /* Enable the block device and add it to /dev */
3202 add_disk(dd->disk);
3203
3204 /*
3205 * Now that the disk is active, initialize any sysfs attributes
3206 * managed by the protocol layer.
3207 */
3208 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3209 if (kobj) {
3210 mtip_hw_sysfs_init(dd, kobj);
3211 kobject_put(kobj);
3212 }
3213
3214 if (dd->mtip_svc_handler)
3215 return rv; /* service thread created for handling rebuild */
3216
3217 start_service_thread:
3218 sprintf(thd_name, "mtip_svc_thd_%02d", index);
3219
3220 dd->mtip_svc_handler = kthread_run(mtip_service_thread,
3221 dd, thd_name);
3222
3223 if (IS_ERR(dd->mtip_svc_handler)) {
3224 printk(KERN_ERR "mtip32xx: service thread failed to start\n");
3225 dd->mtip_svc_handler = NULL;
3226 rv = -EFAULT;
3227 goto kthread_run_error;
3228 }
3229
3230 return rv;
3231
3232 kthread_run_error:
3233 /* Delete our gendisk. This also removes the device from /dev */
3234 del_gendisk(dd->disk);
3235
3236 read_capacity_error:
3237 blk_cleanup_queue(dd->queue);
3238
3239 block_queue_alloc_init_error:
3240 disk_index_error:
3241 spin_lock(&rssd_index_lock);
3242 ida_remove(&rssd_index_ida, index);
3243 spin_unlock(&rssd_index_lock);
3244
3245 ida_get_error:
3246 put_disk(dd->disk);
3247
3248 alloc_disk_error:
3249 mtip_hw_exit(dd); /* De-initialize the protocol layer. */
3250
3251 protocol_init_error:
3252 return rv;
3253 }
3254
3255 /*
3256 * Block layer deinitialization function.
3257 *
3258 * Called by the PCI layer as each P320 device is removed.
3259 *
3260 * @dd Pointer to the driver data structure.
3261 *
3262 * return value
3263 * 0
3264 */
3265 static int mtip_block_remove(struct driver_data *dd)
3266 {
3267 struct kobject *kobj;
3268
3269 if (dd->mtip_svc_handler) {
3270 set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags);
3271 wake_up_interruptible(&dd->port->svc_wait);
3272 kthread_stop(dd->mtip_svc_handler);
3273 }
3274
3275 /* Clean up the sysfs attributes managed by the protocol layer. */
3276 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3277 if (kobj) {
3278 mtip_hw_sysfs_exit(dd, kobj);
3279 kobject_put(kobj);
3280 }
3281
3282 /*
3283 * Delete our gendisk structure. This also removes the device
3284 * from /dev
3285 */
3286 del_gendisk(dd->disk);
3287 blk_cleanup_queue(dd->queue);
3288 dd->disk = NULL;
3289 dd->queue = NULL;
3290
3291 /* De-initialize the protocol layer. */
3292 mtip_hw_exit(dd);
3293
3294 return 0;
3295 }
3296
3297 /*
3298 * Function called by the PCI layer when just before the
3299 * machine shuts down.
3300 *
3301 * If a protocol layer shutdown function is present it will be called
3302 * by this function.
3303 *
3304 * @dd Pointer to the driver data structure.
3305 *
3306 * return value
3307 * 0
3308 */
3309 static int mtip_block_shutdown(struct driver_data *dd)
3310 {
3311 dev_info(&dd->pdev->dev,
3312 "Shutting down %s ...\n", dd->disk->disk_name);
3313
3314 /* Delete our gendisk structure, and cleanup the blk queue. */
3315 del_gendisk(dd->disk);
3316 blk_cleanup_queue(dd->queue);
3317 dd->disk = NULL;
3318 dd->queue = NULL;
3319
3320 mtip_hw_shutdown(dd);
3321 return 0;
3322 }
3323
3324 static int mtip_block_suspend(struct driver_data *dd)
3325 {
3326 dev_info(&dd->pdev->dev,
3327 "Suspending %s ...\n", dd->disk->disk_name);
3328 mtip_hw_suspend(dd);
3329 return 0;
3330 }
3331
3332 static int mtip_block_resume(struct driver_data *dd)
3333 {
3334 dev_info(&dd->pdev->dev, "Resuming %s ...\n",
3335 dd->disk->disk_name);
3336 mtip_hw_resume(dd);
3337 return 0;
3338 }
3339
3340 /*
3341 * Called for each supported PCI device detected.
3342 *
3343 * This function allocates the private data structure, enables the
3344 * PCI device and then calls the block layer initialization function.
3345 *
3346 * return value
3347 * 0 on success else an error code.
3348 */
3349 static int mtip_pci_probe(struct pci_dev *pdev,
3350 const struct pci_device_id *ent)
3351 {
3352 int rv = 0;
3353 struct driver_data *dd = NULL;
3354
3355 /* Allocate memory for this devices private data. */
3356 dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL);
3357 if (dd == NULL) {
3358 dev_err(&pdev->dev,
3359 "Unable to allocate memory for driver data\n");
3360 return -ENOMEM;
3361 }
3362
3363 /* Set the atomic variable as 1 in case of SRSI */
3364 atomic_set(&dd->drv_cleanup_done, true);
3365
3366 atomic_set(&dd->resumeflag, false);
3367
3368 /* Attach the private data to this PCI device. */
3369 pci_set_drvdata(pdev, dd);
3370
3371 rv = pcim_enable_device(pdev);
3372 if (rv < 0) {
3373 dev_err(&pdev->dev, "Unable to enable device\n");
3374 goto iomap_err;
3375 }
3376
3377 /* Map BAR5 to memory. */
3378 rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME);
3379 if (rv < 0) {
3380 dev_err(&pdev->dev, "Unable to map regions\n");
3381 goto iomap_err;
3382 }
3383
3384 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
3385 rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3386
3387 if (rv) {
3388 rv = pci_set_consistent_dma_mask(pdev,
3389 DMA_BIT_MASK(32));
3390 if (rv) {
3391 dev_warn(&pdev->dev,
3392 "64-bit DMA enable failed\n");
3393 goto setmask_err;
3394 }
3395 }
3396 }
3397
3398 pci_set_master(pdev);
3399
3400 if (pci_enable_msi(pdev)) {
3401 dev_warn(&pdev->dev,
3402 "Unable to enable MSI interrupt.\n");
3403 goto block_initialize_err;
3404 }
3405
3406 /* Copy the info we may need later into the private data structure. */
3407 dd->major = mtip_major;
3408 dd->instance = instance;
3409 dd->pdev = pdev;
3410
3411 /* Initialize the block layer. */
3412 rv = mtip_block_initialize(dd);
3413 if (rv < 0) {
3414 dev_err(&pdev->dev,
3415 "Unable to initialize block layer\n");
3416 goto block_initialize_err;
3417 }
3418
3419 /*
3420 * Increment the instance count so that each device has a unique
3421 * instance number.
3422 */
3423 instance++;
3424
3425 goto done;
3426
3427 block_initialize_err:
3428 pci_disable_msi(pdev);
3429
3430 setmask_err:
3431 pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
3432
3433 iomap_err:
3434 kfree(dd);
3435 pci_set_drvdata(pdev, NULL);
3436 return rv;
3437 done:
3438 /* Set the atomic variable as 0 in case of SRSI */
3439 atomic_set(&dd->drv_cleanup_done, true);
3440
3441 return rv;
3442 }
3443
3444 /*
3445 * Called for each probed device when the device is removed or the
3446 * driver is unloaded.
3447 *
3448 * return value
3449 * None
3450 */
3451 static void mtip_pci_remove(struct pci_dev *pdev)
3452 {
3453 struct driver_data *dd = pci_get_drvdata(pdev);
3454 int counter = 0;
3455
3456 if (mtip_check_surprise_removal(pdev)) {
3457 while (atomic_read(&dd->drv_cleanup_done) == false) {
3458 counter++;
3459 msleep(20);
3460 if (counter == 10) {
3461 /* Cleanup the outstanding commands */
3462 mtip_command_cleanup(dd);
3463 break;
3464 }
3465 }
3466 }
3467 /* Set the atomic variable as 1 in case of SRSI */
3468 atomic_set(&dd->drv_cleanup_done, true);
3469
3470 /* Clean up the block layer. */
3471 mtip_block_remove(dd);
3472
3473 pci_disable_msi(pdev);
3474
3475 kfree(dd);
3476 pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
3477 }
3478
3479 /*
3480 * Called for each probed device when the device is suspended.
3481 *
3482 * return value
3483 * 0 Success
3484 * <0 Error
3485 */
3486 static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
3487 {
3488 int rv = 0;
3489 struct driver_data *dd = pci_get_drvdata(pdev);
3490
3491 if (!dd) {
3492 dev_err(&pdev->dev,
3493 "Driver private datastructure is NULL\n");
3494 return -EFAULT;
3495 }
3496
3497 atomic_set(&dd->resumeflag, true);
3498
3499 /* Disable ports & interrupts then send standby immediate */
3500 rv = mtip_block_suspend(dd);
3501 if (rv < 0) {
3502 dev_err(&pdev->dev,
3503 "Failed to suspend controller\n");
3504 return rv;
3505 }
3506
3507 /*
3508 * Save the pci config space to pdev structure &
3509 * disable the device
3510 */
3511 pci_save_state(pdev);
3512 pci_disable_device(pdev);
3513
3514 /* Move to Low power state*/
3515 pci_set_power_state(pdev, PCI_D3hot);
3516
3517 return rv;
3518 }
3519
3520 /*
3521 * Called for each probed device when the device is resumed.
3522 *
3523 * return value
3524 * 0 Success
3525 * <0 Error
3526 */
3527 static int mtip_pci_resume(struct pci_dev *pdev)
3528 {
3529 int rv = 0;
3530 struct driver_data *dd;
3531
3532 dd = pci_get_drvdata(pdev);
3533 if (!dd) {
3534 dev_err(&pdev->dev,
3535 "Driver private datastructure is NULL\n");
3536 return -EFAULT;
3537 }
3538
3539 /* Move the device to active State */
3540 pci_set_power_state(pdev, PCI_D0);
3541
3542 /* Restore PCI configuration space */
3543 pci_restore_state(pdev);
3544
3545 /* Enable the PCI device*/
3546 rv = pcim_enable_device(pdev);
3547 if (rv < 0) {
3548 dev_err(&pdev->dev,
3549 "Failed to enable card during resume\n");
3550 goto err;
3551 }
3552 pci_set_master(pdev);
3553
3554 /*
3555 * Calls hbaReset, initPort, & startPort function
3556 * then enables interrupts
3557 */
3558 rv = mtip_block_resume(dd);
3559 if (rv < 0)
3560 dev_err(&pdev->dev, "Unable to resume\n");
3561
3562 err:
3563 atomic_set(&dd->resumeflag, false);
3564
3565 return rv;
3566 }
3567
3568 /*
3569 * Shutdown routine
3570 *
3571 * return value
3572 * None
3573 */
3574 static void mtip_pci_shutdown(struct pci_dev *pdev)
3575 {
3576 struct driver_data *dd = pci_get_drvdata(pdev);
3577 if (dd)
3578 mtip_block_shutdown(dd);
3579 }
3580
3581 /* Table of device ids supported by this driver. */
3582 static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = {
3583 { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320_DEVICE_ID) },
3584 { 0 }
3585 };
3586
3587 /* Structure that describes the PCI driver functions. */
3588 static struct pci_driver mtip_pci_driver = {
3589 .name = MTIP_DRV_NAME,
3590 .id_table = mtip_pci_tbl,
3591 .probe = mtip_pci_probe,
3592 .remove = mtip_pci_remove,
3593 .suspend = mtip_pci_suspend,
3594 .resume = mtip_pci_resume,
3595 .shutdown = mtip_pci_shutdown,
3596 };
3597
3598 MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
3599
3600 /*
3601 * Module initialization function.
3602 *
3603 * Called once when the module is loaded. This function allocates a major
3604 * block device number to the Cyclone devices and registers the PCI layer
3605 * of the driver.
3606 *
3607 * Return value
3608 * 0 on success else error code.
3609 */
3610 static int __init mtip_init(void)
3611 {
3612 printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
3613
3614 /* Allocate a major block device number to use with this driver. */
3615 mtip_major = register_blkdev(0, MTIP_DRV_NAME);
3616 if (mtip_major < 0) {
3617 printk(KERN_ERR "Unable to register block device (%d)\n",
3618 mtip_major);
3619 return -EBUSY;
3620 }
3621
3622 /* Register our PCI operations. */
3623 return pci_register_driver(&mtip_pci_driver);
3624 }
3625
3626 /*
3627 * Module de-initialization function.
3628 *
3629 * Called once when the module is unloaded. This function deallocates
3630 * the major block device number allocated by mtip_init() and
3631 * unregisters the PCI layer of the driver.
3632 *
3633 * Return value
3634 * none
3635 */
3636 static void __exit mtip_exit(void)
3637 {
3638 /* Release the allocated major block device number. */
3639 unregister_blkdev(mtip_major, MTIP_DRV_NAME);
3640
3641 /* Unregister the PCI driver. */
3642 pci_unregister_driver(&mtip_pci_driver);
3643 }
3644
3645 MODULE_AUTHOR("Micron Technology, Inc");
3646 MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver");
3647 MODULE_LICENSE("GPL");
3648 MODULE_VERSION(MTIP_DRV_VERSION);
3649
3650 module_init(mtip_init);
3651 module_exit(mtip_exit);
3652
drivers/block/mtip32xx/mtip32xx.h
File was created 1 /*
2 * mtip32xx.h - Header file for the P320 SSD Block Driver
3 * Copyright (C) 2011 Micron Technology, Inc.
4 *
5 * Portions of this code were derived from works subjected to the
6 * following copyright:
7 * Copyright (C) 2009 Integrated Device Technology, Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 */
20
21 #ifndef __MTIP32XX_H__
22 #define __MTIP32XX_H__
23
24 #include <linux/spinlock.h>
25 #include <linux/rwsem.h>
26 #include <linux/ata.h>
27 #include <linux/interrupt.h>
28 #include <linux/genhd.h>
29 #include <linux/version.h>
30
31 /* Offset of Subsystem Device ID in pci confoguration space */
32 #define PCI_SUBSYSTEM_DEVICEID 0x2E
33
34 /* offset of Device Control register in PCIe extended capabilites space */
35 #define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48
36
37 /* # of times to retry timed out IOs */
38 #define MTIP_MAX_RETRIES 5
39
40 /* Various timeout values in ms */
41 #define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000
42 #define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000
43 #define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000
44
45 /* check for timeouts every 500ms */
46 #define MTIP_TIMEOUT_CHECK_PERIOD 500
47
48 /* ftl rebuild */
49 #define MTIP_FTL_REBUILD_OFFSET 142
50 #define MTIP_FTL_REBUILD_MAGIC 0xED51
51 #define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000
52
53 /* Macro to extract the tag bit number from a tag value. */
54 #define MTIP_TAG_BIT(tag) (tag & 0x1F)
55
56 /*
57 * Macro to extract the tag index from a tag value. The index
58 * is used to access the correct s_active/Command Issue register based
59 * on the tag value.
60 */
61 #define MTIP_TAG_INDEX(tag) (tag >> 5)
62
63 /*
64 * Maximum number of scatter gather entries
65 * a single command may have.
66 */
67 #define MTIP_MAX_SG 128
68
69 /*
70 * Maximum number of slot groups (Command Issue & s_active registers)
71 * NOTE: This is the driver maximum; check dd->slot_groups for actual value.
72 */
73 #define MTIP_MAX_SLOT_GROUPS 8
74
75 /* Internal command tag. */
76 #define MTIP_TAG_INTERNAL 0
77
78 /* Micron Vendor ID & P320x SSD Device ID */
79 #define PCI_VENDOR_ID_MICRON 0x1344
80 #define P320_DEVICE_ID 0x5150
81
82 /* Driver name and version strings */
83 #define MTIP_DRV_NAME "mtip32xx"
84 #define MTIP_DRV_VERSION "1.2.6os3"
85
86 /* Maximum number of minor device numbers per device. */
87 #define MTIP_MAX_MINORS 16
88
89 /* Maximum number of supported command slots. */
90 #define MTIP_MAX_COMMAND_SLOTS (MTIP_MAX_SLOT_GROUPS * 32)
91
92 /*
93 * Per-tag bitfield size in longs.
94 * Linux bit manipulation functions
95 * (i.e. test_and_set_bit, find_next_zero_bit)
96 * manipulate memory in longs, so we try to make the math work.
97 * take the slot groups and find the number of longs, rounding up.
98 * Careful! i386 and x86_64 use different size longs!
99 */
100 #define U32_PER_LONG (sizeof(long) / sizeof(u32))
101 #define SLOTBITS_IN_LONGS ((MTIP_MAX_SLOT_GROUPS + \
102 (U32_PER_LONG-1))/U32_PER_LONG)
103
104 /* BAR number used to access the HBA registers. */
105 #define MTIP_ABAR 5
106
107 /* Forced Unit Access Bit */
108 #define FUA_BIT 0x80
109
110 #ifdef DEBUG
111 #define dbg_printk(format, arg...) \
112 printk(pr_fmt(format), ##arg);
113 #else
114 #define dbg_printk(format, arg...)
115 #endif
116
117 #define __force_bit2int (unsigned int __force)
118
119 /* below are bit numbers in 'flags' defined in mtip_port */
120 #define MTIP_FLAG_IC_ACTIVE_BIT 0
121 #define MTIP_FLAG_EH_ACTIVE_BIT 1
122 #define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2
123 #define MTIP_FLAG_ISSUE_CMDS_BIT 4
124 #define MTIP_FLAG_REBUILD_BIT 5
125 #define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8
126
127 /* Register Frame Information Structure (FIS), host to device. */
128 struct host_to_dev_fis {
129 /*
130 * FIS type.
131 * - 27h Register FIS, host to device.
132 * - 34h Register FIS, device to host.
133 * - 39h DMA Activate FIS, device to host.
134 * - 41h DMA Setup FIS, bi-directional.
135 * - 46h Data FIS, bi-directional.
136 * - 58h BIST Activate FIS, bi-directional.
137 * - 5Fh PIO Setup FIS, device to host.
138 * - A1h Set Device Bits FIS, device to host.
139 */
140 unsigned char type;
141 unsigned char opts;
142 unsigned char command;
143 unsigned char features;
144
145 union {
146 unsigned char lba_low;
147 unsigned char sector;
148 };
149 union {
150 unsigned char lba_mid;
151 unsigned char cyl_low;
152 };
153 union {
154 unsigned char lba_hi;
155 unsigned char cyl_hi;
156 };
157 union {
158 unsigned char device;
159 unsigned char head;
160 };
161
162 union {
163 unsigned char lba_low_ex;
164 unsigned char sector_ex;
165 };
166 union {
167 unsigned char lba_mid_ex;
168 unsigned char cyl_low_ex;
169 };
170 union {
171 unsigned char lba_hi_ex;
172 unsigned char cyl_hi_ex;
173 };
174 unsigned char features_ex;
175
176 unsigned char sect_count;
177 unsigned char sect_cnt_ex;
178 unsigned char res2;
179 unsigned char control;
180
181 unsigned int res3;
182 };
183
184 /* Command header structure. */
185 struct mtip_cmd_hdr {
186 /*
187 * Command options.
188 * - Bits 31:16 Number of PRD entries.
189 * - Bits 15:8 Unused in this implementation.
190 * - Bit 7 Prefetch bit, informs the drive to prefetch PRD entries.
191 * - Bit 6 Write bit, should be set when writing data to the device.
192 * - Bit 5 Unused in this implementation.
193 * - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes).
194 */
195 unsigned int opts;
196 /* This field is unsed when using NCQ. */
197 union {
198 unsigned int byte_count;
199 unsigned int status;
200 };
201 /*
202 * Lower 32 bits of the command table address associated with this
203 * header. The command table addresses must be 128 byte aligned.
204 */
205 unsigned int ctba;
206 /*
207 * If 64 bit addressing is used this field is the upper 32 bits
208 * of the command table address associated with this command.
209 */
210 unsigned int ctbau;
211 /* Reserved and unused. */
212 unsigned int res[4];
213 };
214
215 /* Command scatter gather structure (PRD). */
216 struct mtip_cmd_sg {
217 /*
218 * Low 32 bits of the data buffer address. For P320 this
219 * address must be 8 byte aligned signified by bits 2:0 being
220 * set to 0.
221 */
222 unsigned int dba;
223 /*
224 * When 64 bit addressing is used this field is the upper
225 * 32 bits of the data buffer address.
226 */
227 unsigned int dba_upper;
228 /* Unused. */
229 unsigned int reserved;
230 /*
231 * Bit 31: interrupt when this data block has been transferred.
232 * Bits 30..22: reserved
233 * Bits 21..0: byte count (minus 1). For P320 the byte count must be
234 * 8 byte aligned signified by bits 2:0 being set to 1.
235 */
236 unsigned int info;
237 };
238 struct mtip_port;
239
240 /* Structure used to describe a command. */
241 struct mtip_cmd {
242
243 struct mtip_cmd_hdr *command_header; /* ptr to command header entry */
244
245 dma_addr_t command_header_dma; /* corresponding physical address */
246
247 void *command; /* ptr to command table entry */
248
249 dma_addr_t command_dma; /* corresponding physical address */
250
251 void *comp_data; /* data passed to completion function comp_func() */
252 /*
253 * Completion function called by the ISR upon completion of
254 * a command.
255 */
256 void (*comp_func)(struct mtip_port *port,
257 int tag,
258 void *data,
259 int status);
260 /* Additional callback function that may be called by comp_func() */
261 void (*async_callback)(void *data, int status);
262
263 void *async_data; /* Addl. data passed to async_callback() */
264
265 int scatter_ents; /* Number of scatter list entries used */
266
267 struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */
268
269 int retries; /* The number of retries left for this command. */
270
271 int direction; /* Data transfer direction */
272
273 unsigned long comp_time; /* command completion time, in jiffies */
274
275 atomic_t active; /* declares if this command sent to the drive. */
276 };
277
278 /* Structure used to describe a port. */
279 struct mtip_port {
280 /* Pointer back to the driver data for this port. */
281 struct driver_data *dd;
282 /*
283 * Used to determine if the data pointed to by the
284 * identify field is valid.
285 */
286 unsigned long identify_valid;
287 /* Base address of the memory mapped IO for the port. */
288 void __iomem *mmio;
289 /* Array of pointers to the memory mapped s_active registers. */
290 void __iomem *s_active[MTIP_MAX_SLOT_GROUPS];
291 /* Array of pointers to the memory mapped completed registers. */
292 void __iomem *completed[MTIP_MAX_SLOT_GROUPS];
293 /* Array of pointers to the memory mapped Command Issue registers. */
294 void __iomem *cmd_issue[MTIP_MAX_SLOT_GROUPS];
295 /*
296 * Pointer to the beginning of the command header memory as used
297 * by the driver.
298 */
299 void *command_list;
300 /*
301 * Pointer to the beginning of the command header memory as used
302 * by the DMA.
303 */
304 dma_addr_t command_list_dma;
305 /*
306 * Pointer to the beginning of the RX FIS memory as used
307 * by the driver.
308 */
309 void *rxfis;
310 /*
311 * Pointer to the beginning of the RX FIS memory as used
312 * by the DMA.
313 */
314 dma_addr_t rxfis_dma;
315 /*
316 * Pointer to the beginning of the command table memory as used
317 * by the driver.
318 */
319 void *command_table;
320 /*
321 * Pointer to the beginning of the command table memory as used
322 * by the DMA.
323 */
324 dma_addr_t command_tbl_dma;
325 /*
326 * Pointer to the beginning of the identify data memory as used
327 * by the driver.
328 */
329 u16 *identify;
330 /*
331 * Pointer to the beginning of the identify data memory as used
332 * by the DMA.
333 */
334 dma_addr_t identify_dma;
335 /*
336 * Pointer to the beginning of a sector buffer that is used
337 * by the driver when issuing internal commands.
338 */
339 u16 *sector_buffer;
340 /*
341 * Pointer to the beginning of a sector buffer that is used
342 * by the DMA when the driver issues internal commands.
343 */
344 dma_addr_t sector_buffer_dma;
345 /*
346 * Bit significant, used to determine if a command slot has
347 * been allocated. i.e. the slot is in use. Bits are cleared
348 * when the command slot and all associated data structures
349 * are no longer needed.
350 */
351 unsigned long allocated[SLOTBITS_IN_LONGS];
352 /*
353 * used to queue commands when an internal command is in progress
354 * or error handling is active
355 */
356 unsigned long cmds_to_issue[SLOTBITS_IN_LONGS];
357 /*
358 * Array of command slots. Structure includes pointers to the
359 * command header and command table, and completion function and data
360 * pointers.
361 */
362 struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS];
363 /* Used by mtip_service_thread to wait for an event */
364 wait_queue_head_t svc_wait;
365 /*
366 * indicates the state of the port. Also, helps the service thread
367 * to determine its action on wake up.
368 */
369 unsigned long flags;
370 /*
371 * Timer used to complete commands that have been active for too long.
372 */
373 struct timer_list cmd_timer;
374 /*
375 * Semaphore used to block threads if there are no
376 * command slots available.
377 */
378 struct semaphore cmd_slot;
379 /* Spinlock for working around command-issue bug. */
380 spinlock_t cmd_issue_lock;
381 };
382
383 /*
384 * Driver private data structure.
385 *
386 * One structure is allocated per probed device.
387 */
388 struct driver_data {
389 void __iomem *mmio; /* Base address of the HBA registers. */
390
391 int major; /* Major device number. */
392
393 int instance; /* Instance number. First device probed is 0, ... */
394
395 struct gendisk *disk; /* Pointer to our gendisk structure. */
396
397 struct pci_dev *pdev; /* Pointer to the PCI device structure. */
398
399 struct request_queue *queue; /* Our request queue. */
400
401 struct mtip_port *port; /* Pointer to the port data structure. */
402
403 /* Tasklet used to process the bottom half of the ISR. */
404 struct tasklet_struct tasklet;
405
406 unsigned product_type; /* magic value declaring the product type */
407
408 unsigned slot_groups; /* number of slot groups the product supports */
409
410 atomic_t drv_cleanup_done; /* Atomic variable for SRSI */
411
412 unsigned long index; /* Index to determine the disk name */
413
414 unsigned int ftlrebuildflag; /* FTL rebuild flag */
415
416 atomic_t resumeflag; /* Atomic variable to track suspend/resume */
417
418 atomic_t eh_active; /* Flag for error handling tracking */
419
420 struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
421 };
422
423 #endif
424
drivers/block/xen-blkback/blkback.c
1 /****************************************************************************** 1 /******************************************************************************
2 * 2 *
3 * Back-end of the driver for virtual block devices. This portion of the 3 * Back-end of the driver for virtual block devices. This portion of the
4 * driver exports a 'unified' block-device interface that can be accessed 4 * driver exports a 'unified' block-device interface that can be accessed
5 * by any operating system that implements a compatible front end. A 5 * by any operating system that implements a compatible front end. A
6 * reference front-end implementation can be found in: 6 * reference front-end implementation can be found in:
7 * drivers/block/xen-blkfront.c 7 * drivers/block/xen-blkfront.c
8 * 8 *
9 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 9 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
10 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2005, Christopher Clark
11 * 11 *
12 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2 13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed 14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other 15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license: 16 * software packages, subject to the following license:
17 * 17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy 18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without 19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify, 20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to 22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions: 23 * the following conditions:
24 * 24 *
25 * The above copyright notice and this permission notice shall be included in 25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software. 26 * all copies or substantial portions of the Software.
27 * 27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE. 34 * IN THE SOFTWARE.
35 */ 35 */
36 36
37 #include <linux/spinlock.h> 37 #include <linux/spinlock.h>
38 #include <linux/kthread.h> 38 #include <linux/kthread.h>
39 #include <linux/list.h> 39 #include <linux/list.h>
40 #include <linux/delay.h> 40 #include <linux/delay.h>
41 #include <linux/freezer.h> 41 #include <linux/freezer.h>
42 #include <linux/loop.h>
43 #include <linux/falloc.h>
44 #include <linux/fs.h>
45 42
46 #include <xen/events.h> 43 #include <xen/events.h>
47 #include <xen/page.h> 44 #include <xen/page.h>
48 #include <asm/xen/hypervisor.h> 45 #include <asm/xen/hypervisor.h>
49 #include <asm/xen/hypercall.h> 46 #include <asm/xen/hypercall.h>
50 #include "common.h" 47 #include "common.h"
51 48
52 /* 49 /*
53 * These are rather arbitrary. They are fairly large because adjacent requests 50 * These are rather arbitrary. They are fairly large because adjacent requests
54 * pulled from a communication ring are quite likely to end up being part of 51 * pulled from a communication ring are quite likely to end up being part of
55 * the same scatter/gather request at the disc. 52 * the same scatter/gather request at the disc.
56 * 53 *
57 * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** 54 * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
58 * 55 *
59 * This will increase the chances of being able to write whole tracks. 56 * This will increase the chances of being able to write whole tracks.
60 * 64 should be enough to keep us competitive with Linux. 57 * 64 should be enough to keep us competitive with Linux.
61 */ 58 */
62 static int xen_blkif_reqs = 64; 59 static int xen_blkif_reqs = 64;
63 module_param_named(reqs, xen_blkif_reqs, int, 0); 60 module_param_named(reqs, xen_blkif_reqs, int, 0);
64 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); 61 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
65 62
66 /* Run-time switchable: /sys/module/blkback/parameters/ */ 63 /* Run-time switchable: /sys/module/blkback/parameters/ */
67 static unsigned int log_stats; 64 static unsigned int log_stats;
68 module_param(log_stats, int, 0644); 65 module_param(log_stats, int, 0644);
69 66
70 /* 67 /*
71 * Each outstanding request that we've passed to the lower device layers has a 68 * Each outstanding request that we've passed to the lower device layers has a
72 * 'pending_req' allocated to it. Each buffer_head that completes decrements 69 * 'pending_req' allocated to it. Each buffer_head that completes decrements
73 * the pendcnt towards zero. When it hits zero, the specified domain has a 70 * the pendcnt towards zero. When it hits zero, the specified domain has a
74 * response queued for it, with the saved 'id' passed back. 71 * response queued for it, with the saved 'id' passed back.
75 */ 72 */
76 struct pending_req { 73 struct pending_req {
77 struct xen_blkif *blkif; 74 struct xen_blkif *blkif;
78 u64 id; 75 u64 id;
79 int nr_pages; 76 int nr_pages;
80 atomic_t pendcnt; 77 atomic_t pendcnt;
81 unsigned short operation; 78 unsigned short operation;
82 int status; 79 int status;
83 struct list_head free_list; 80 struct list_head free_list;
84 }; 81 };
85 82
86 #define BLKBACK_INVALID_HANDLE (~0) 83 #define BLKBACK_INVALID_HANDLE (~0)
87 84
88 struct xen_blkbk { 85 struct xen_blkbk {
89 struct pending_req *pending_reqs; 86 struct pending_req *pending_reqs;
90 /* List of all 'pending_req' available */ 87 /* List of all 'pending_req' available */
91 struct list_head pending_free; 88 struct list_head pending_free;
92 /* And its spinlock. */ 89 /* And its spinlock. */
93 spinlock_t pending_free_lock; 90 spinlock_t pending_free_lock;
94 wait_queue_head_t pending_free_wq; 91 wait_queue_head_t pending_free_wq;
95 /* The list of all pages that are available. */ 92 /* The list of all pages that are available. */
96 struct page **pending_pages; 93 struct page **pending_pages;
97 /* And the grant handles that are available. */ 94 /* And the grant handles that are available. */
98 grant_handle_t *pending_grant_handles; 95 grant_handle_t *pending_grant_handles;
99 }; 96 };
100 97
101 static struct xen_blkbk *blkbk; 98 static struct xen_blkbk *blkbk;
102 99
103 /* 100 /*
104 * Little helpful macro to figure out the index and virtual address of the 101 * Little helpful macro to figure out the index and virtual address of the
105 * pending_pages[..]. For each 'pending_req' we have have up to 102 * pending_pages[..]. For each 'pending_req' we have have up to
106 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through 103 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
107 * 10 and would index in the pending_pages[..]. 104 * 10 and would index in the pending_pages[..].
108 */ 105 */
109 static inline int vaddr_pagenr(struct pending_req *req, int seg) 106 static inline int vaddr_pagenr(struct pending_req *req, int seg)
110 { 107 {
111 return (req - blkbk->pending_reqs) * 108 return (req - blkbk->pending_reqs) *
112 BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; 109 BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
113 } 110 }
114 111
115 #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] 112 #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
116 113
117 static inline unsigned long vaddr(struct pending_req *req, int seg) 114 static inline unsigned long vaddr(struct pending_req *req, int seg)
118 { 115 {
119 unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); 116 unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
120 return (unsigned long)pfn_to_kaddr(pfn); 117 return (unsigned long)pfn_to_kaddr(pfn);
121 } 118 }
122 119
123 #define pending_handle(_req, _seg) \ 120 #define pending_handle(_req, _seg) \
124 (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) 121 (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
125 122
126 123
127 static int do_block_io_op(struct xen_blkif *blkif); 124 static int do_block_io_op(struct xen_blkif *blkif);
128 static int dispatch_rw_block_io(struct xen_blkif *blkif, 125 static int dispatch_rw_block_io(struct xen_blkif *blkif,
129 struct blkif_request *req, 126 struct blkif_request *req,
130 struct pending_req *pending_req); 127 struct pending_req *pending_req);
131 static void make_response(struct xen_blkif *blkif, u64 id, 128 static void make_response(struct xen_blkif *blkif, u64 id,
132 unsigned short op, int st); 129 unsigned short op, int st);
133 130
134 /* 131 /*
135 * Retrieve from the 'pending_reqs' a free pending_req structure to be used. 132 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
136 */ 133 */
137 static struct pending_req *alloc_req(void) 134 static struct pending_req *alloc_req(void)
138 { 135 {
139 struct pending_req *req = NULL; 136 struct pending_req *req = NULL;
140 unsigned long flags; 137 unsigned long flags;
141 138
142 spin_lock_irqsave(&blkbk->pending_free_lock, flags); 139 spin_lock_irqsave(&blkbk->pending_free_lock, flags);
143 if (!list_empty(&blkbk->pending_free)) { 140 if (!list_empty(&blkbk->pending_free)) {
144 req = list_entry(blkbk->pending_free.next, struct pending_req, 141 req = list_entry(blkbk->pending_free.next, struct pending_req,
145 free_list); 142 free_list);
146 list_del(&req->free_list); 143 list_del(&req->free_list);
147 } 144 }
148 spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); 145 spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
149 return req; 146 return req;
150 } 147 }
151 148
152 /* 149 /*
153 * Return the 'pending_req' structure back to the freepool. We also 150 * Return the 'pending_req' structure back to the freepool. We also
154 * wake up the thread if it was waiting for a free page. 151 * wake up the thread if it was waiting for a free page.
155 */ 152 */
156 static void free_req(struct pending_req *req) 153 static void free_req(struct pending_req *req)
157 { 154 {
158 unsigned long flags; 155 unsigned long flags;
159 int was_empty; 156 int was_empty;
160 157
161 spin_lock_irqsave(&blkbk->pending_free_lock, flags); 158 spin_lock_irqsave(&blkbk->pending_free_lock, flags);
162 was_empty = list_empty(&blkbk->pending_free); 159 was_empty = list_empty(&blkbk->pending_free);
163 list_add(&req->free_list, &blkbk->pending_free); 160 list_add(&req->free_list, &blkbk->pending_free);
164 spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); 161 spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
165 if (was_empty) 162 if (was_empty)
166 wake_up(&blkbk->pending_free_wq); 163 wake_up(&blkbk->pending_free_wq);
167 } 164 }
168 165
169 /* 166 /*
170 * Routines for managing virtual block devices (vbds). 167 * Routines for managing virtual block devices (vbds).
171 */ 168 */
172 static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, 169 static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
173 int operation) 170 int operation)
174 { 171 {
175 struct xen_vbd *vbd = &blkif->vbd; 172 struct xen_vbd *vbd = &blkif->vbd;
176 int rc = -EACCES; 173 int rc = -EACCES;
177 174
178 if ((operation != READ) && vbd->readonly) 175 if ((operation != READ) && vbd->readonly)
179 goto out; 176 goto out;
180 177
181 if (likely(req->nr_sects)) { 178 if (likely(req->nr_sects)) {
182 blkif_sector_t end = req->sector_number + req->nr_sects; 179 blkif_sector_t end = req->sector_number + req->nr_sects;
183 180
184 if (unlikely(end < req->sector_number)) 181 if (unlikely(end < req->sector_number))
185 goto out; 182 goto out;
186 if (unlikely(end > vbd_sz(vbd))) 183 if (unlikely(end > vbd_sz(vbd)))
187 goto out; 184 goto out;
188 } 185 }
189 186
190 req->dev = vbd->pdevice; 187 req->dev = vbd->pdevice;
191 req->bdev = vbd->bdev; 188 req->bdev = vbd->bdev;
192 rc = 0; 189 rc = 0;
193 190
194 out: 191 out:
195 return rc; 192 return rc;
196 } 193 }
197 194
198 static void xen_vbd_resize(struct xen_blkif *blkif) 195 static void xen_vbd_resize(struct xen_blkif *blkif)
199 { 196 {
200 struct xen_vbd *vbd = &blkif->vbd; 197 struct xen_vbd *vbd = &blkif->vbd;
201 struct xenbus_transaction xbt; 198 struct xenbus_transaction xbt;
202 int err; 199 int err;
203 struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); 200 struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be);
204 unsigned long long new_size = vbd_sz(vbd); 201 unsigned long long new_size = vbd_sz(vbd);
205 202
206 pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n", 203 pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n",
207 blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); 204 blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
208 pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size); 205 pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size);
209 vbd->size = new_size; 206 vbd->size = new_size;
210 again: 207 again:
211 err = xenbus_transaction_start(&xbt); 208 err = xenbus_transaction_start(&xbt);
212 if (err) { 209 if (err) {
213 pr_warn(DRV_PFX "Error starting transaction"); 210 pr_warn(DRV_PFX "Error starting transaction");
214 return; 211 return;
215 } 212 }
216 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 213 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
217 (unsigned long long)vbd_sz(vbd)); 214 (unsigned long long)vbd_sz(vbd));
218 if (err) { 215 if (err) {
219 pr_warn(DRV_PFX "Error writing new size"); 216 pr_warn(DRV_PFX "Error writing new size");
220 goto abort; 217 goto abort;
221 } 218 }
222 /* 219 /*
223 * Write the current state; we will use this to synchronize 220 * Write the current state; we will use this to synchronize
224 * the front-end. If the current state is "connected" the 221 * the front-end. If the current state is "connected" the
225 * front-end will get the new size information online. 222 * front-end will get the new size information online.
226 */ 223 */
227 err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); 224 err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
228 if (err) { 225 if (err) {
229 pr_warn(DRV_PFX "Error writing the state"); 226 pr_warn(DRV_PFX "Error writing the state");
230 goto abort; 227 goto abort;
231 } 228 }
232 229
233 err = xenbus_transaction_end(xbt, 0); 230 err = xenbus_transaction_end(xbt, 0);
234 if (err == -EAGAIN) 231 if (err == -EAGAIN)
235 goto again; 232 goto again;
236 if (err) 233 if (err)
237 pr_warn(DRV_PFX "Error ending transaction"); 234 pr_warn(DRV_PFX "Error ending transaction");
238 return; 235 return;
239 abort: 236 abort:
240 xenbus_transaction_end(xbt, 1); 237 xenbus_transaction_end(xbt, 1);
241 } 238 }
242 239
243 /* 240 /*
244 * Notification from the guest OS. 241 * Notification from the guest OS.
245 */ 242 */
246 static void blkif_notify_work(struct xen_blkif *blkif) 243 static void blkif_notify_work(struct xen_blkif *blkif)
247 { 244 {
248 blkif->waiting_reqs = 1; 245 blkif->waiting_reqs = 1;
249 wake_up(&blkif->wq); 246 wake_up(&blkif->wq);
250 } 247 }
251 248
252 irqreturn_t xen_blkif_be_int(int irq, void *dev_id) 249 irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
253 { 250 {
254 blkif_notify_work(dev_id); 251 blkif_notify_work(dev_id);
255 return IRQ_HANDLED; 252 return IRQ_HANDLED;
256 } 253 }
257 254
258 /* 255 /*
259 * SCHEDULER FUNCTIONS 256 * SCHEDULER FUNCTIONS
260 */ 257 */
261 258
262 static void print_stats(struct xen_blkif *blkif) 259 static void print_stats(struct xen_blkif *blkif)
263 { 260 {
264 pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" 261 pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
265 " | ds %4d\n", 262 " | ds %4d\n",
266 current->comm, blkif->st_oo_req, 263 current->comm, blkif->st_oo_req,
267 blkif->st_rd_req, blkif->st_wr_req, 264 blkif->st_rd_req, blkif->st_wr_req,
268 blkif->st_f_req, blkif->st_ds_req); 265 blkif->st_f_req, blkif->st_ds_req);
269 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); 266 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
270 blkif->st_rd_req = 0; 267 blkif->st_rd_req = 0;
271 blkif->st_wr_req = 0; 268 blkif->st_wr_req = 0;
272 blkif->st_oo_req = 0; 269 blkif->st_oo_req = 0;
273 blkif->st_ds_req = 0; 270 blkif->st_ds_req = 0;
274 } 271 }
275 272
276 int xen_blkif_schedule(void *arg) 273 int xen_blkif_schedule(void *arg)
277 { 274 {
278 struct xen_blkif *blkif = arg; 275 struct xen_blkif *blkif = arg;
279 struct xen_vbd *vbd = &blkif->vbd; 276 struct xen_vbd *vbd = &blkif->vbd;
280 277
281 xen_blkif_get(blkif); 278 xen_blkif_get(blkif);
282 279
283 while (!kthread_should_stop()) { 280 while (!kthread_should_stop()) {
284 if (try_to_freeze()) 281 if (try_to_freeze())
285 continue; 282 continue;
286 if (unlikely(vbd->size != vbd_sz(vbd))) 283 if (unlikely(vbd->size != vbd_sz(vbd)))
287 xen_vbd_resize(blkif); 284 xen_vbd_resize(blkif);
288 285
289 wait_event_interruptible( 286 wait_event_interruptible(
290 blkif->wq, 287 blkif->wq,
291 blkif->waiting_reqs || kthread_should_stop()); 288 blkif->waiting_reqs || kthread_should_stop());
292 wait_event_interruptible( 289 wait_event_interruptible(
293 blkbk->pending_free_wq, 290 blkbk->pending_free_wq,
294 !list_empty(&blkbk->pending_free) || 291 !list_empty(&blkbk->pending_free) ||
295 kthread_should_stop()); 292 kthread_should_stop());
296 293
297 blkif->waiting_reqs = 0; 294 blkif->waiting_reqs = 0;
298 smp_mb(); /* clear flag *before* checking for work */ 295 smp_mb(); /* clear flag *before* checking for work */
299 296
300 if (do_block_io_op(blkif)) 297 if (do_block_io_op(blkif))
301 blkif->waiting_reqs = 1; 298 blkif->waiting_reqs = 1;
302 299
303 if (log_stats && time_after(jiffies, blkif->st_print)) 300 if (log_stats && time_after(jiffies, blkif->st_print))
304 print_stats(blkif); 301 print_stats(blkif);
305 } 302 }
306 303
307 if (log_stats) 304 if (log_stats)
308 print_stats(blkif); 305 print_stats(blkif);
309 306
310 blkif->xenblkd = NULL; 307 blkif->xenblkd = NULL;
311 xen_blkif_put(blkif); 308 xen_blkif_put(blkif);
312 309
313 return 0; 310 return 0;
314 } 311 }
315 312
316 struct seg_buf { 313 struct seg_buf {
317 unsigned long buf; 314 unsigned long buf;
318 unsigned int nsec; 315 unsigned int nsec;
319 }; 316 };
320 /* 317 /*
321 * Unmap the grant references, and also remove the M2P over-rides 318 * Unmap the grant references, and also remove the M2P over-rides
322 * used in the 'pending_req'. 319 * used in the 'pending_req'.
323 */ 320 */
324 static void xen_blkbk_unmap(struct pending_req *req) 321 static void xen_blkbk_unmap(struct pending_req *req)
325 { 322 {
326 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 323 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
327 unsigned int i, invcount = 0; 324 unsigned int i, invcount = 0;
328 grant_handle_t handle; 325 grant_handle_t handle;
329 int ret; 326 int ret;
330 327
331 for (i = 0; i < req->nr_pages; i++) { 328 for (i = 0; i < req->nr_pages; i++) {
332 handle = pending_handle(req, i); 329 handle = pending_handle(req, i);
333 if (handle == BLKBACK_INVALID_HANDLE) 330 if (handle == BLKBACK_INVALID_HANDLE)
334 continue; 331 continue;
335 gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), 332 gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
336 GNTMAP_host_map, handle); 333 GNTMAP_host_map, handle);
337 pending_handle(req, i) = BLKBACK_INVALID_HANDLE; 334 pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
338 invcount++; 335 invcount++;
339 } 336 }
340 337
341 ret = HYPERVISOR_grant_table_op( 338 ret = HYPERVISOR_grant_table_op(
342 GNTTABOP_unmap_grant_ref, unmap, invcount); 339 GNTTABOP_unmap_grant_ref, unmap, invcount);
343 BUG_ON(ret); 340 BUG_ON(ret);
344 /* 341 /*
345 * Note, we use invcount, so nr->pages, so we can't index 342 * Note, we use invcount, so nr->pages, so we can't index
346 * using vaddr(req, i). 343 * using vaddr(req, i).
347 */ 344 */
348 for (i = 0; i < invcount; i++) { 345 for (i = 0; i < invcount; i++) {
349 ret = m2p_remove_override( 346 ret = m2p_remove_override(
350 virt_to_page(unmap[i].host_addr), false); 347 virt_to_page(unmap[i].host_addr), false);
351 if (ret) { 348 if (ret) {
352 pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n", 349 pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n",
353 (unsigned long)unmap[i].host_addr); 350 (unsigned long)unmap[i].host_addr);
354 continue; 351 continue;
355 } 352 }
356 } 353 }
357 } 354 }
358 355
359 static int xen_blkbk_map(struct blkif_request *req, 356 static int xen_blkbk_map(struct blkif_request *req,
360 struct pending_req *pending_req, 357 struct pending_req *pending_req,
361 struct seg_buf seg[]) 358 struct seg_buf seg[])
362 { 359 {
363 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 360 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
364 int i; 361 int i;
365 int nseg = req->nr_segments; 362 int nseg = req->u.rw.nr_segments;
366 int ret = 0; 363 int ret = 0;
367 364
368 /* 365 /*
369 * Fill out preq.nr_sects with proper amount of sectors, and setup 366 * Fill out preq.nr_sects with proper amount of sectors, and setup
370 * assign map[..] with the PFN of the page in our domain with the 367 * assign map[..] with the PFN of the page in our domain with the
371 * corresponding grant reference for each page. 368 * corresponding grant reference for each page.
372 */ 369 */
373 for (i = 0; i < nseg; i++) { 370 for (i = 0; i < nseg; i++) {
374 uint32_t flags; 371 uint32_t flags;
375 372
376 flags = GNTMAP_host_map; 373 flags = GNTMAP_host_map;
377 if (pending_req->operation != BLKIF_OP_READ) 374 if (pending_req->operation != BLKIF_OP_READ)
378 flags |= GNTMAP_readonly; 375 flags |= GNTMAP_readonly;
379 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, 376 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
380 req->u.rw.seg[i].gref, 377 req->u.rw.seg[i].gref,
381 pending_req->blkif->domid); 378 pending_req->blkif->domid);
382 } 379 }
383 380
384 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); 381 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
385 BUG_ON(ret); 382 BUG_ON(ret);
386 383
387 /* 384 /*
388 * Now swizzle the MFN in our domain with the MFN from the other domain 385 * Now swizzle the MFN in our domain with the MFN from the other domain
389 * so that when we access vaddr(pending_req,i) it has the contents of 386 * so that when we access vaddr(pending_req,i) it has the contents of
390 * the page from the other domain. 387 * the page from the other domain.
391 */ 388 */
392 for (i = 0; i < nseg; i++) { 389 for (i = 0; i < nseg; i++) {
393 if (unlikely(map[i].status != 0)) { 390 if (unlikely(map[i].status != 0)) {
394 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); 391 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
395 map[i].handle = BLKBACK_INVALID_HANDLE; 392 map[i].handle = BLKBACK_INVALID_HANDLE;
396 ret |= 1; 393 ret |= 1;
397 } 394 }
398 395
399 pending_handle(pending_req, i) = map[i].handle; 396 pending_handle(pending_req, i) = map[i].handle;
400 397
401 if (ret) 398 if (ret)
402 continue; 399 continue;
403 400
404 ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), 401 ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
405 blkbk->pending_page(pending_req, i), NULL); 402 blkbk->pending_page(pending_req, i), NULL);
406 if (ret) { 403 if (ret) {
407 pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", 404 pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
408 (unsigned long)map[i].dev_bus_addr, ret); 405 (unsigned long)map[i].dev_bus_addr, ret);
409 /* We could switch over to GNTTABOP_copy */ 406 /* We could switch over to GNTTABOP_copy */
410 continue; 407 continue;
411 } 408 }
412 409
413 seg[i].buf = map[i].dev_bus_addr | 410 seg[i].buf = map[i].dev_bus_addr |
414 (req->u.rw.seg[i].first_sect << 9); 411 (req->u.rw.seg[i].first_sect << 9);
415 } 412 }
416 return ret; 413 return ret;
417 } 414 }
418 415
419 static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) 416 static int dispatch_discard_io(struct xen_blkif *blkif,
417 struct blkif_request *req)
420 { 418 {
421 int err = 0; 419 int err = 0;
422 int status = BLKIF_RSP_OKAY; 420 int status = BLKIF_RSP_OKAY;
423 struct block_device *bdev = blkif->vbd.bdev; 421 struct block_device *bdev = blkif->vbd.bdev;
424 422
425 if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) 423 blkif->st_ds_req++;
426 /* just forward the discard request */ 424
425 xen_blkif_get(blkif);
426 if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
427 blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
428 unsigned long secure = (blkif->vbd.discard_secure &&
429 (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
430 BLKDEV_DISCARD_SECURE : 0;
427 err = blkdev_issue_discard(bdev, 431 err = blkdev_issue_discard(bdev,
428 req->u.discard.sector_number, 432 req->u.discard.sector_number,
429 req->u.discard.nr_sectors, 433 req->u.discard.nr_sectors,
430 GFP_KERNEL, 0); 434 GFP_KERNEL, secure);
431 else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
432 /* punch a hole in the backing file */
433 struct loop_device *lo = bdev->bd_disk->private_data;
434 struct file *file = lo->lo_backing_file;
435
436 if (file->f_op->fallocate)
437 err = file->f_op->fallocate(file,
438 FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
439 req->u.discard.sector_number << 9,
440 req->u.discard.nr_sectors << 9);
441 else
442 err = -EOPNOTSUPP;
443 } else 435 } else
444 err = -EOPNOTSUPP; 436 err = -EOPNOTSUPP;
445 437
446 if (err == -EOPNOTSUPP) { 438 if (err == -EOPNOTSUPP) {
447 pr_debug(DRV_PFX "discard op failed, not supported\n"); 439 pr_debug(DRV_PFX "discard op failed, not supported\n");
448 status = BLKIF_RSP_EOPNOTSUPP; 440 status = BLKIF_RSP_EOPNOTSUPP;
449 } else if (err) 441 } else if (err)
450 status = BLKIF_RSP_ERROR; 442 status = BLKIF_RSP_ERROR;
451 443
452 make_response(blkif, req->id, req->operation, status); 444 make_response(blkif, req->u.discard.id, req->operation, status);
445 xen_blkif_put(blkif);
446 return err;
453 } 447 }
454 448
455 static void xen_blk_drain_io(struct xen_blkif *blkif) 449 static void xen_blk_drain_io(struct xen_blkif *blkif)
456 { 450 {
457 atomic_set(&blkif->drain, 1); 451 atomic_set(&blkif->drain, 1);
458 do { 452 do {
459 /* The initial value is one, and one refcnt taken at the 453 /* The initial value is one, and one refcnt taken at the
460 * start of the xen_blkif_schedule thread. */ 454 * start of the xen_blkif_schedule thread. */
461 if (atomic_read(&blkif->refcnt) <= 2) 455 if (atomic_read(&blkif->refcnt) <= 2)
462 break; 456 break;
463 wait_for_completion_interruptible_timeout( 457 wait_for_completion_interruptible_timeout(
464 &blkif->drain_complete, HZ); 458 &blkif->drain_complete, HZ);
465 459
466 if (!atomic_read(&blkif->drain)) 460 if (!atomic_read(&blkif->drain))
467 break; 461 break;
468 } while (!kthread_should_stop()); 462 } while (!kthread_should_stop());
469 atomic_set(&blkif->drain, 0); 463 atomic_set(&blkif->drain, 0);
470 } 464 }
471 465
472 /* 466 /*
473 * Completion callback on the bio's. Called as bh->b_end_io() 467 * Completion callback on the bio's. Called as bh->b_end_io()
474 */ 468 */
475 469
476 static void __end_block_io_op(struct pending_req *pending_req, int error) 470 static void __end_block_io_op(struct pending_req *pending_req, int error)
477 { 471 {
478 /* An error fails the entire request. */ 472 /* An error fails the entire request. */
479 if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && 473 if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
480 (error == -EOPNOTSUPP)) { 474 (error == -EOPNOTSUPP)) {
481 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); 475 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
482 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); 476 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
483 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 477 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
484 } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && 478 } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
485 (error == -EOPNOTSUPP)) { 479 (error == -EOPNOTSUPP)) {
486 pr_debug(DRV_PFX "write barrier op failed, not supported\n"); 480 pr_debug(DRV_PFX "write barrier op failed, not supported\n");
487 xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); 481 xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
488 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 482 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
489 } else if (error) { 483 } else if (error) {
490 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," 484 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
491 " error=%d\n", error); 485 " error=%d\n", error);
492 pending_req->status = BLKIF_RSP_ERROR; 486 pending_req->status = BLKIF_RSP_ERROR;
493 } 487 }
494 488
495 /* 489 /*
496 * If all of the bio's have completed it is time to unmap 490 * If all of the bio's have completed it is time to unmap
497 * the grant references associated with 'request' and provide 491 * the grant references associated with 'request' and provide
498 * the proper response on the ring. 492 * the proper response on the ring.
499 */ 493 */
500 if (atomic_dec_and_test(&pending_req->pendcnt)) { 494 if (atomic_dec_and_test(&pending_req->pendcnt)) {
501 xen_blkbk_unmap(pending_req); 495 xen_blkbk_unmap(pending_req);
502 make_response(pending_req->blkif, pending_req->id, 496 make_response(pending_req->blkif, pending_req->id,
503 pending_req->operation, pending_req->status); 497 pending_req->operation, pending_req->status);
504 xen_blkif_put(pending_req->blkif); 498 xen_blkif_put(pending_req->blkif);
505 if (atomic_read(&pending_req->blkif->refcnt) <= 2) { 499 if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
506 if (atomic_read(&pending_req->blkif->drain)) 500 if (atomic_read(&pending_req->blkif->drain))
507 complete(&pending_req->blkif->drain_complete); 501 complete(&pending_req->blkif->drain_complete);
508 } 502 }
509 free_req(pending_req); 503 free_req(pending_req);
510 } 504 }
511 } 505 }
512 506
513 /* 507 /*
514 * bio callback. 508 * bio callback.
515 */ 509 */
516 static void end_block_io_op(struct bio *bio, int error) 510 static void end_block_io_op(struct bio *bio, int error)
517 { 511 {
518 __end_block_io_op(bio->bi_private, error); 512 __end_block_io_op(bio->bi_private, error);
519 bio_put(bio); 513 bio_put(bio);
520 } 514 }
521 515
522 516
523 517
524 /* 518 /*
525 * Function to copy the from the ring buffer the 'struct blkif_request' 519 * Function to copy the from the ring buffer the 'struct blkif_request'
526 * (which has the sectors we want, number of them, grant references, etc), 520 * (which has the sectors we want, number of them, grant references, etc),
527 * and transmute it to the block API to hand it over to the proper block disk. 521 * and transmute it to the block API to hand it over to the proper block disk.
528 */ 522 */
529 static int 523 static int
530 __do_block_io_op(struct xen_blkif *blkif) 524 __do_block_io_op(struct xen_blkif *blkif)
531 { 525 {
532 union blkif_back_rings *blk_rings = &blkif->blk_rings; 526 union blkif_back_rings *blk_rings = &blkif->blk_rings;
533 struct blkif_request req; 527 struct blkif_request req;
534 struct pending_req *pending_req; 528 struct pending_req *pending_req;
535 RING_IDX rc, rp; 529 RING_IDX rc, rp;
536 int more_to_do = 0; 530 int more_to_do = 0;
537 531
538 rc = blk_rings->common.req_cons; 532 rc = blk_rings->common.req_cons;
539 rp = blk_rings->common.sring->req_prod; 533 rp = blk_rings->common.sring->req_prod;
540 rmb(); /* Ensure we see queued requests up to 'rp'. */ 534 rmb(); /* Ensure we see queued requests up to 'rp'. */
541 535
542 while (rc != rp) { 536 while (rc != rp) {
543 537
544 if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) 538 if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
545 break; 539 break;
546 540
547 if (kthread_should_stop()) { 541 if (kthread_should_stop()) {
548 more_to_do = 1; 542 more_to_do = 1;
549 break; 543 break;
550 } 544 }
551 545
552 pending_req = alloc_req(); 546 pending_req = alloc_req();
553 if (NULL == pending_req) { 547 if (NULL == pending_req) {
554 blkif->st_oo_req++; 548 blkif->st_oo_req++;
555 more_to_do = 1; 549 more_to_do = 1;
556 break; 550 break;
557 } 551 }
558 552
559 switch (blkif->blk_protocol) { 553 switch (blkif->blk_protocol) {
560 case BLKIF_PROTOCOL_NATIVE: 554 case BLKIF_PROTOCOL_NATIVE:
561 memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); 555 memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
562 break; 556 break;
563 case BLKIF_PROTOCOL_X86_32: 557 case BLKIF_PROTOCOL_X86_32:
564 blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); 558 blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
565 break; 559 break;
566 case BLKIF_PROTOCOL_X86_64: 560 case BLKIF_PROTOCOL_X86_64:
567 blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); 561 blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
568 break; 562 break;
569 default: 563 default:
570 BUG(); 564 BUG();
571 } 565 }
572 blk_rings->common.req_cons = ++rc; /* before make_response() */ 566 blk_rings->common.req_cons = ++rc; /* before make_response() */
573 567
574 /* Apply all sanity checks to /private copy/ of request. */ 568 /* Apply all sanity checks to /private copy/ of request. */
575 barrier(); 569 barrier();
576 570 if (unlikely(req.operation == BLKIF_OP_DISCARD)) {
577 if (dispatch_rw_block_io(blkif, &req, pending_req)) 571 free_req(pending_req);
572 if (dispatch_discard_io(blkif, &req))
573 break;
574 } else if (dispatch_rw_block_io(blkif, &req, pending_req))
578 break; 575 break;
579 576
580 /* Yield point for this unbounded loop. */ 577 /* Yield point for this unbounded loop. */
581 cond_resched(); 578 cond_resched();
582 } 579 }
583 580
584 return more_to_do; 581 return more_to_do;
585 } 582 }
586 583
587 static int 584 static int
588 do_block_io_op(struct xen_blkif *blkif) 585 do_block_io_op(struct xen_blkif *blkif)
589 { 586 {
590 union blkif_back_rings *blk_rings = &blkif->blk_rings; 587 union blkif_back_rings *blk_rings = &blkif->blk_rings;
591 int more_to_do; 588 int more_to_do;
592 589
593 do { 590 do {
594 more_to_do = __do_block_io_op(blkif); 591 more_to_do = __do_block_io_op(blkif);
595 if (more_to_do) 592 if (more_to_do)
596 break; 593 break;
597 594
598 RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); 595 RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
599 } while (more_to_do); 596 } while (more_to_do);
600 597
601 return more_to_do; 598 return more_to_do;
602 } 599 }
603 /* 600 /*
604 * Transmutation of the 'struct blkif_request' to a proper 'struct bio' 601 * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
605 * and call the 'submit_bio' to pass it to the underlying storage. 602 * and call the 'submit_bio' to pass it to the underlying storage.
606 */ 603 */
607 static int dispatch_rw_block_io(struct xen_blkif *blkif, 604 static int dispatch_rw_block_io(struct xen_blkif *blkif,
608 struct blkif_request *req, 605 struct blkif_request *req,
609 struct pending_req *pending_req) 606 struct pending_req *pending_req)
610 { 607 {
611 struct phys_req preq; 608 struct phys_req preq;
612 struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 609 struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
613 unsigned int nseg; 610 unsigned int nseg;
614 struct bio *bio = NULL; 611 struct bio *bio = NULL;
615 struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 612 struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
616 int i, nbio = 0; 613 int i, nbio = 0;
617 int operation; 614 int operation;
618 struct blk_plug plug; 615 struct blk_plug plug;
619 bool drain = false; 616 bool drain = false;
620 617
621 switch (req->operation) { 618 switch (req->operation) {
622 case BLKIF_OP_READ: 619 case BLKIF_OP_READ:
623 blkif->st_rd_req++; 620 blkif->st_rd_req++;
624 operation = READ; 621 operation = READ;
625 break; 622 break;
626 case BLKIF_OP_WRITE: 623 case BLKIF_OP_WRITE:
627 blkif->st_wr_req++; 624 blkif->st_wr_req++;
628 operation = WRITE_ODIRECT; 625 operation = WRITE_ODIRECT;
629 break; 626 break;
630 case BLKIF_OP_WRITE_BARRIER: 627 case BLKIF_OP_WRITE_BARRIER:
631 drain = true; 628 drain = true;
632 case BLKIF_OP_FLUSH_DISKCACHE: 629 case BLKIF_OP_FLUSH_DISKCACHE:
633 blkif->st_f_req++; 630 blkif->st_f_req++;
634 operation = WRITE_FLUSH; 631 operation = WRITE_FLUSH;
635 break; 632 break;
636 case BLKIF_OP_DISCARD:
637 blkif->st_ds_req++;
638 operation = REQ_DISCARD;
639 break;
640 default: 633 default:
641 operation = 0; /* make gcc happy */ 634 operation = 0; /* make gcc happy */
642 goto fail_response; 635 goto fail_response;
643 break; 636 break;
644 } 637 }
645 638
646 /* Check that the number of segments is sane. */ 639 /* Check that the number of segments is sane. */
647 nseg = req->nr_segments; 640 nseg = req->u.rw.nr_segments;
648 if (unlikely(nseg == 0 && operation != WRITE_FLUSH && 641
649 operation != REQ_DISCARD) || 642 if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
650 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { 643 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
651 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", 644 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
652 nseg); 645 nseg);
653 /* Haven't submitted any bio's yet. */ 646 /* Haven't submitted any bio's yet. */
654 goto fail_response; 647 goto fail_response;
655 } 648 }
656 649
657 preq.dev = req->handle; 650 preq.dev = req->u.rw.handle;
658 preq.sector_number = req->u.rw.sector_number; 651 preq.sector_number = req->u.rw.sector_number;
659 preq.nr_sects = 0; 652 preq.nr_sects = 0;
660 653
661 pending_req->blkif = blkif; 654 pending_req->blkif = blkif;
662 pending_req->id = req->id; 655 pending_req->id = req->u.rw.id;
663 pending_req->operation = req->operation; 656 pending_req->operation = req->operation;
664 pending_req->status = BLKIF_RSP_OKAY; 657 pending_req->status = BLKIF_RSP_OKAY;
665 pending_req->nr_pages = nseg; 658 pending_req->nr_pages = nseg;
666 659
667 for (i = 0; i < nseg; i++) { 660 for (i = 0; i < nseg; i++) {
668 seg[i].nsec = req->u.rw.seg[i].last_sect - 661 seg[i].nsec = req->u.rw.seg[i].last_sect -
669 req->u.rw.seg[i].first_sect + 1; 662 req->u.rw.seg[i].first_sect + 1;
670 if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || 663 if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
671 (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) 664 (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
672 goto fail_response; 665 goto fail_response;
673 preq.nr_sects += seg[i].nsec; 666 preq.nr_sects += seg[i].nsec;
674 667
675 } 668 }
676 669
677 if (xen_vbd_translate(&preq, blkif, operation) != 0) { 670 if (xen_vbd_translate(&preq, blkif, operation) != 0) {
678 pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", 671 pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
679 operation == READ ? "read" : "write", 672 operation == READ ? "read" : "write",
680 preq.sector_number, 673 preq.sector_number,
681 preq.sector_number + preq.nr_sects, preq.dev); 674 preq.sector_number + preq.nr_sects, preq.dev);
682 goto fail_response; 675 goto fail_response;
683 } 676 }
684 677
685 /* 678 /*
686 * This check _MUST_ be done after xen_vbd_translate as the preq.bdev 679 * This check _MUST_ be done after xen_vbd_translate as the preq.bdev
687 * is set there. 680 * is set there.
688 */ 681 */
689 for (i = 0; i < nseg; i++) { 682 for (i = 0; i < nseg; i++) {
690 if (((int)preq.sector_number|(int)seg[i].nsec) & 683 if (((int)preq.sector_number|(int)seg[i].nsec) &
691 ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { 684 ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
692 pr_debug(DRV_PFX "Misaligned I/O request from domain %d", 685 pr_debug(DRV_PFX "Misaligned I/O request from domain %d",
693 blkif->domid); 686 blkif->domid);
694 goto fail_response; 687 goto fail_response;
695 } 688 }
696 } 689 }
697 690
698 /* Wait on all outstanding I/O's and once that has been completed 691 /* Wait on all outstanding I/O's and once that has been completed
699 * issue the WRITE_FLUSH. 692 * issue the WRITE_FLUSH.
700 */ 693 */
701 if (drain) 694 if (drain)
702 xen_blk_drain_io(pending_req->blkif); 695 xen_blk_drain_io(pending_req->blkif);
703 696
704 /* 697 /*
705 * If we have failed at this point, we need to undo the M2P override, 698 * If we have failed at this point, we need to undo the M2P override,
706 * set gnttab_set_unmap_op on all of the grant references and perform 699 * set gnttab_set_unmap_op on all of the grant references and perform
707 * the hypercall to unmap the grants - that is all done in 700 * the hypercall to unmap the grants - that is all done in
708 * xen_blkbk_unmap. 701 * xen_blkbk_unmap.
709 */ 702 */
710 if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) 703 if (xen_blkbk_map(req, pending_req, seg))
711 goto fail_flush; 704 goto fail_flush;
712 705
713 /* 706 /*
714 * This corresponding xen_blkif_put is done in __end_block_io_op, or 707 * This corresponding xen_blkif_put is done in __end_block_io_op, or
715 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. 708 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
716 */ 709 */
717 xen_blkif_get(blkif); 710 xen_blkif_get(blkif);
718 711
719 for (i = 0; i < nseg; i++) { 712 for (i = 0; i < nseg; i++) {
720 while ((bio == NULL) || 713 while ((bio == NULL) ||
721 (bio_add_page(bio, 714 (bio_add_page(bio,
722 blkbk->pending_page(pending_req, i), 715 blkbk->pending_page(pending_req, i),
723 seg[i].nsec << 9, 716 seg[i].nsec << 9,
724 seg[i].buf & ~PAGE_MASK) == 0)) { 717 seg[i].buf & ~PAGE_MASK) == 0)) {
725 718
726 bio = bio_alloc(GFP_KERNEL, nseg-i); 719 bio = bio_alloc(GFP_KERNEL, nseg-i);
727 if (unlikely(bio == NULL)) 720 if (unlikely(bio == NULL))
728 goto fail_put_bio; 721 goto fail_put_bio;
729 722
730 biolist[nbio++] = bio; 723 biolist[nbio++] = bio;
731 bio->bi_bdev = preq.bdev; 724 bio->bi_bdev = preq.bdev;
732 bio->bi_private = pending_req; 725 bio->bi_private = pending_req;
733 bio->bi_end_io = end_block_io_op; 726 bio->bi_end_io = end_block_io_op;
734 bio->bi_sector = preq.sector_number; 727 bio->bi_sector = preq.sector_number;
735 } 728 }
736 729
737 preq.sector_number += seg[i].nsec; 730 preq.sector_number += seg[i].nsec;
738 } 731 }
739 732
740 /* This will be hit if the operation was a flush or discard. */ 733 /* This will be hit if the operation was a flush or discard. */
741 if (!bio) { 734 if (!bio) {
742 BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); 735 BUG_ON(operation != WRITE_FLUSH);
743 736
744 if (operation == WRITE_FLUSH) { 737 bio = bio_alloc(GFP_KERNEL, 0);
745 bio = bio_alloc(GFP_KERNEL, 0); 738 if (unlikely(bio == NULL))
746 if (unlikely(bio == NULL)) 739 goto fail_put_bio;
747 goto fail_put_bio;
748 740
749 biolist[nbio++] = bio; 741 biolist[nbio++] = bio;
750 bio->bi_bdev = preq.bdev; 742 bio->bi_bdev = preq.bdev;
751 bio->bi_private = pending_req; 743 bio->bi_private = pending_req;
752 bio->bi_end_io = end_block_io_op; 744 bio->bi_end_io = end_block_io_op;
753 } else if (operation == REQ_DISCARD) {
754 xen_blk_discard(blkif, req);
755 xen_blkif_put(blkif);
756 free_req(pending_req);
757 return 0;
758 }
759 } 745 }
760 746
761 /* 747 /*
762 * We set it one so that the last submit_bio does not have to call 748 * We set it one so that the last submit_bio does not have to call
763 * atomic_inc. 749 * atomic_inc.
764 */ 750 */
765 atomic_set(&pending_req->pendcnt, nbio); 751 atomic_set(&pending_req->pendcnt, nbio);
766 752
767 /* Get a reference count for the disk queue and start sending I/O */ 753 /* Get a reference count for the disk queue and start sending I/O */
768 blk_start_plug(&plug); 754 blk_start_plug(&plug);
769 755
770 for (i = 0; i < nbio; i++) 756 for (i = 0; i < nbio; i++)
771 submit_bio(operation, biolist[i]); 757 submit_bio(operation, biolist[i]);
772 758
773 /* Let the I/Os go.. */ 759 /* Let the I/Os go.. */
774 blk_finish_plug(&plug); 760 blk_finish_plug(&plug);
775 761
776 if (operation == READ) 762 if (operation == READ)
777 blkif->st_rd_sect += preq.nr_sects; 763 blkif->st_rd_sect += preq.nr_sects;
778 else if (operation & WRITE) 764 else if (operation & WRITE)
779 blkif->st_wr_sect += preq.nr_sects; 765 blkif->st_wr_sect += preq.nr_sects;
780 766
781 return 0; 767 return 0;
782 768
783 fail_flush: 769 fail_flush:
784 xen_blkbk_unmap(pending_req); 770 xen_blkbk_unmap(pending_req);
785 fail_response: 771 fail_response:
786 /* Haven't submitted any bio's yet. */ 772 /* Haven't submitted any bio's yet. */
787 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); 773 make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
788 free_req(pending_req); 774 free_req(pending_req);
789 msleep(1); /* back off a bit */ 775 msleep(1); /* back off a bit */
790 return -EIO; 776 return -EIO;
791 777
792 fail_put_bio: 778 fail_put_bio:
793 for (i = 0; i < nbio; i++) 779 for (i = 0; i < nbio; i++)
794 bio_put(biolist[i]); 780 bio_put(biolist[i]);
795 __end_block_io_op(pending_req, -EINVAL); 781 __end_block_io_op(pending_req, -EINVAL);
796 msleep(1); /* back off a bit */ 782 msleep(1); /* back off a bit */
797 return -EIO; 783 return -EIO;
798 } 784 }
799 785
800 786
801 787
802 /* 788 /*
803 * Put a response on the ring on how the operation fared. 789 * Put a response on the ring on how the operation fared.
804 */ 790 */
805 static void make_response(struct xen_blkif *blkif, u64 id, 791 static void make_response(struct xen_blkif *blkif, u64 id,
806 unsigned short op, int st) 792 unsigned short op, int st)
807 { 793 {
808 struct blkif_response resp; 794 struct blkif_response resp;
809 unsigned long flags; 795 unsigned long flags;
810 union blkif_back_rings *blk_rings = &blkif->blk_rings; 796 union blkif_back_rings *blk_rings = &blkif->blk_rings;
811 int notify; 797 int notify;
812 798
813 resp.id = id; 799 resp.id = id;
814 resp.operation = op; 800 resp.operation = op;
815 resp.status = st; 801 resp.status = st;
816 802
817 spin_lock_irqsave(&blkif->blk_ring_lock, flags); 803 spin_lock_irqsave(&blkif->blk_ring_lock, flags);
818 /* Place on the response ring for the relevant domain. */ 804 /* Place on the response ring for the relevant domain. */
819 switch (blkif->blk_protocol) { 805 switch (blkif->blk_protocol) {
820 case BLKIF_PROTOCOL_NATIVE: 806 case BLKIF_PROTOCOL_NATIVE:
821 memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), 807 memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
822 &resp, sizeof(resp)); 808 &resp, sizeof(resp));
823 break; 809 break;
824 case BLKIF_PROTOCOL_X86_32: 810 case BLKIF_PROTOCOL_X86_32:
825 memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), 811 memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
826 &resp, sizeof(resp)); 812 &resp, sizeof(resp));
827 break; 813 break;
828 case BLKIF_PROTOCOL_X86_64: 814 case BLKIF_PROTOCOL_X86_64:
829 memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), 815 memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
830 &resp, sizeof(resp)); 816 &resp, sizeof(resp));
831 break; 817 break;
832 default: 818 default:
833 BUG(); 819 BUG();
834 } 820 }
835 blk_rings->common.rsp_prod_pvt++; 821 blk_rings->common.rsp_prod_pvt++;
836 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); 822 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
837 spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); 823 spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
838 if (notify) 824 if (notify)
839 notify_remote_via_irq(blkif->irq); 825 notify_remote_via_irq(blkif->irq);
840 } 826 }
841 827
842 static int __init xen_blkif_init(void) 828 static int __init xen_blkif_init(void)
843 { 829 {
844 int i, mmap_pages; 830 int i, mmap_pages;
845 int rc = 0; 831 int rc = 0;
846 832
847 if (!xen_pv_domain()) 833 if (!xen_pv_domain())
848 return -ENODEV; 834 return -ENODEV;
849 835
850 blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); 836 blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
851 if (!blkbk) { 837 if (!blkbk) {
852 pr_alert(DRV_PFX "%s: out of memory!\n", __func__); 838 pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
853 return -ENOMEM; 839 return -ENOMEM;
854 } 840 }
855 841
856 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; 842 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
857 843
858 blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * 844 blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) *
859 xen_blkif_reqs, GFP_KERNEL); 845 xen_blkif_reqs, GFP_KERNEL);
860 blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * 846 blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
861 mmap_pages, GFP_KERNEL); 847 mmap_pages, GFP_KERNEL);
862 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * 848 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
863 mmap_pages, GFP_KERNEL); 849 mmap_pages, GFP_KERNEL);
864 850
865 if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || 851 if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
866 !blkbk->pending_pages) { 852 !blkbk->pending_pages) {
867 rc = -ENOMEM; 853 rc = -ENOMEM;
868 goto out_of_memory; 854 goto out_of_memory;
869 } 855 }
870 856
871 for (i = 0; i < mmap_pages; i++) { 857 for (i = 0; i < mmap_pages; i++) {
872 blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; 858 blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
873 blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); 859 blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
874 if (blkbk->pending_pages[i] == NULL) { 860 if (blkbk->pending_pages[i] == NULL) {
875 rc = -ENOMEM; 861 rc = -ENOMEM;
876 goto out_of_memory; 862 goto out_of_memory;
877 } 863 }
878 } 864 }
879 rc = xen_blkif_interface_init(); 865 rc = xen_blkif_interface_init();
880 if (rc) 866 if (rc)
881 goto failed_init; 867 goto failed_init;
882 868
883 INIT_LIST_HEAD(&blkbk->pending_free); 869 INIT_LIST_HEAD(&blkbk->pending_free);
884 spin_lock_init(&blkbk->pending_free_lock); 870 spin_lock_init(&blkbk->pending_free_lock);
885 init_waitqueue_head(&blkbk->pending_free_wq); 871 init_waitqueue_head(&blkbk->pending_free_wq);
886 872
887 for (i = 0; i < xen_blkif_reqs; i++) 873 for (i = 0; i < xen_blkif_reqs; i++)
888 list_add_tail(&blkbk->pending_reqs[i].free_list, 874 list_add_tail(&blkbk->pending_reqs[i].free_list,
889 &blkbk->pending_free); 875 &blkbk->pending_free);
890 876
891 rc = xen_blkif_xenbus_init(); 877 rc = xen_blkif_xenbus_init();
892 if (rc) 878 if (rc)
893 goto failed_init; 879 goto failed_init;
894 880
895 return 0; 881 return 0;
896 882
897 out_of_memory: 883 out_of_memory:
898 pr_alert(DRV_PFX "%s: out of memory\n", __func__); 884 pr_alert(DRV_PFX "%s: out of memory\n", __func__);
899 failed_init: 885 failed_init:
900 kfree(blkbk->pending_reqs); 886 kfree(blkbk->pending_reqs);
901 kfree(blkbk->pending_grant_handles); 887 kfree(blkbk->pending_grant_handles);
902 if (blkbk->pending_pages) { 888 if (blkbk->pending_pages) {
903 for (i = 0; i < mmap_pages; i++) { 889 for (i = 0; i < mmap_pages; i++) {
904 if (blkbk->pending_pages[i]) 890 if (blkbk->pending_pages[i])
905 __free_page(blkbk->pending_pages[i]); 891 __free_page(blkbk->pending_pages[i]);
906 } 892 }
drivers/block/xen-blkback/common.h
1 /* 1 /*
2 * This program is free software; you can redistribute it and/or 2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License version 2 3 * modify it under the terms of the GNU General Public License version 2
4 * as published by the Free Software Foundation; or, when distributed 4 * as published by the Free Software Foundation; or, when distributed
5 * separately from the Linux kernel or incorporated into other 5 * separately from the Linux kernel or incorporated into other
6 * software packages, subject to the following license: 6 * software packages, subject to the following license:
7 * 7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this source file (the "Software"), to deal in the Software without 9 * of this source file (the "Software"), to deal in the Software without
10 * restriction, including without limitation the rights to use, copy, modify, 10 * restriction, including without limitation the rights to use, copy, modify,
11 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 11 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so, subject to 12 * and to permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions: 13 * the following conditions:
14 * 14 *
15 * The above copyright notice and this permission notice shall be included in 15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software. 16 * all copies or substantial portions of the Software.
17 * 17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE. 24 * IN THE SOFTWARE.
25 */ 25 */
26 26
27 #ifndef __XEN_BLKIF__BACKEND__COMMON_H__ 27 #ifndef __XEN_BLKIF__BACKEND__COMMON_H__
28 #define __XEN_BLKIF__BACKEND__COMMON_H__ 28 #define __XEN_BLKIF__BACKEND__COMMON_H__
29 29
30 #include <linux/module.h> 30 #include <linux/module.h>
31 #include <linux/interrupt.h> 31 #include <linux/interrupt.h>
32 #include <linux/slab.h> 32 #include <linux/slab.h>
33 #include <linux/blkdev.h> 33 #include <linux/blkdev.h>
34 #include <linux/vmalloc.h> 34 #include <linux/vmalloc.h>
35 #include <linux/wait.h> 35 #include <linux/wait.h>
36 #include <linux/io.h> 36 #include <linux/io.h>
37 #include <asm/setup.h> 37 #include <asm/setup.h>
38 #include <asm/pgalloc.h> 38 #include <asm/pgalloc.h>
39 #include <asm/hypervisor.h> 39 #include <asm/hypervisor.h>
40 #include <xen/grant_table.h> 40 #include <xen/grant_table.h>
41 #include <xen/xenbus.h> 41 #include <xen/xenbus.h>
42 #include <xen/interface/io/ring.h> 42 #include <xen/interface/io/ring.h>
43 #include <xen/interface/io/blkif.h> 43 #include <xen/interface/io/blkif.h>
44 #include <xen/interface/io/protocols.h> 44 #include <xen/interface/io/protocols.h>
45 45
46 #define DRV_PFX "xen-blkback:" 46 #define DRV_PFX "xen-blkback:"
47 #define DPRINTK(fmt, args...) \ 47 #define DPRINTK(fmt, args...) \
48 pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ 48 pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \
49 __func__, __LINE__, ##args) 49 __func__, __LINE__, ##args)
50 50
51 51
52 /* Not a real protocol. Used to generate ring structs which contain 52 /* Not a real protocol. Used to generate ring structs which contain
53 * the elements common to all protocols only. This way we get a 53 * the elements common to all protocols only. This way we get a
54 * compiler-checkable way to use common struct elements, so we can 54 * compiler-checkable way to use common struct elements, so we can
55 * avoid using switch(protocol) in a number of places. */ 55 * avoid using switch(protocol) in a number of places. */
56 struct blkif_common_request { 56 struct blkif_common_request {
57 char dummy; 57 char dummy;
58 }; 58 };
59 struct blkif_common_response { 59 struct blkif_common_response {
60 char dummy; 60 char dummy;
61 }; 61 };
62 62
63 /* i386 protocol version */
64 #pragma pack(push, 4)
65
66 struct blkif_x86_32_request_rw { 63 struct blkif_x86_32_request_rw {
64 uint8_t nr_segments; /* number of segments */
65 blkif_vdev_t handle; /* only for read/write requests */
66 uint64_t id; /* private guest value, echoed in resp */
67 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 67 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
68 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 68 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
69 }; 69 } __attribute__((__packed__));
70 70
71 struct blkif_x86_32_request_discard { 71 struct blkif_x86_32_request_discard {
72 uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
73 blkif_vdev_t _pad1; /* was "handle" for read/write requests */
74 uint64_t id; /* private guest value, echoed in resp */
72 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 75 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
73 uint64_t nr_sectors; 76 uint64_t nr_sectors;
74 }; 77 } __attribute__((__packed__));
75 78
76 struct blkif_x86_32_request { 79 struct blkif_x86_32_request {
77 uint8_t operation; /* BLKIF_OP_??? */ 80 uint8_t operation; /* BLKIF_OP_??? */
78 uint8_t nr_segments; /* number of segments */
79 blkif_vdev_t handle; /* only for read/write requests */
80 uint64_t id; /* private guest value, echoed in resp */
81 union { 81 union {
82 struct blkif_x86_32_request_rw rw; 82 struct blkif_x86_32_request_rw rw;
83 struct blkif_x86_32_request_discard discard; 83 struct blkif_x86_32_request_discard discard;
84 } u; 84 } u;
85 }; 85 } __attribute__((__packed__));
86
87 /* i386 protocol version */
88 #pragma pack(push, 4)
86 struct blkif_x86_32_response { 89 struct blkif_x86_32_response {
87 uint64_t id; /* copied from request */ 90 uint64_t id; /* copied from request */
88 uint8_t operation; /* copied from request */ 91 uint8_t operation; /* copied from request */
89 int16_t status; /* BLKIF_RSP_??? */ 92 int16_t status; /* BLKIF_RSP_??? */
90 }; 93 };
91 #pragma pack(pop) 94 #pragma pack(pop)
92
93 /* x86_64 protocol version */ 95 /* x86_64 protocol version */
94 96
95 struct blkif_x86_64_request_rw { 97 struct blkif_x86_64_request_rw {
98 uint8_t nr_segments; /* number of segments */
99 blkif_vdev_t handle; /* only for read/write requests */
100 uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */
101 uint64_t id;
96 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 102 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
97 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 103 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
98 }; 104 } __attribute__((__packed__));
99 105
100 struct blkif_x86_64_request_discard { 106 struct blkif_x86_64_request_discard {
107 uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
108 blkif_vdev_t _pad1; /* was "handle" for read/write requests */
109 uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */
110 uint64_t id;
101 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 111 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
102 uint64_t nr_sectors; 112 uint64_t nr_sectors;
103 }; 113 } __attribute__((__packed__));
104 114
105 struct blkif_x86_64_request { 115 struct blkif_x86_64_request {
106 uint8_t operation; /* BLKIF_OP_??? */ 116 uint8_t operation; /* BLKIF_OP_??? */
107 uint8_t nr_segments; /* number of segments */
108 blkif_vdev_t handle; /* only for read/write requests */
109 uint64_t __attribute__((__aligned__(8))) id;
110 union { 117 union {
111 struct blkif_x86_64_request_rw rw; 118 struct blkif_x86_64_request_rw rw;
112 struct blkif_x86_64_request_discard discard; 119 struct blkif_x86_64_request_discard discard;
113 } u; 120 } u;
114 }; 121 } __attribute__((__packed__));
122
115 struct blkif_x86_64_response { 123 struct blkif_x86_64_response {
116 uint64_t __attribute__((__aligned__(8))) id; 124 uint64_t __attribute__((__aligned__(8))) id;
117 uint8_t operation; /* copied from request */ 125 uint8_t operation; /* copied from request */
118 int16_t status; /* BLKIF_RSP_??? */ 126 int16_t status; /* BLKIF_RSP_??? */
119 }; 127 };
120 128
121 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, 129 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
122 struct blkif_common_response); 130 struct blkif_common_response);
123 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, 131 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
124 struct blkif_x86_32_response); 132 struct blkif_x86_32_response);
125 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, 133 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
126 struct blkif_x86_64_response); 134 struct blkif_x86_64_response);
127 135
128 union blkif_back_rings { 136 union blkif_back_rings {
129 struct blkif_back_ring native; 137 struct blkif_back_ring native;
130 struct blkif_common_back_ring common; 138 struct blkif_common_back_ring common;
131 struct blkif_x86_32_back_ring x86_32; 139 struct blkif_x86_32_back_ring x86_32;
132 struct blkif_x86_64_back_ring x86_64; 140 struct blkif_x86_64_back_ring x86_64;
133 }; 141 };
134 142
135 enum blkif_protocol { 143 enum blkif_protocol {
136 BLKIF_PROTOCOL_NATIVE = 1, 144 BLKIF_PROTOCOL_NATIVE = 1,
137 BLKIF_PROTOCOL_X86_32 = 2, 145 BLKIF_PROTOCOL_X86_32 = 2,
138 BLKIF_PROTOCOL_X86_64 = 3, 146 BLKIF_PROTOCOL_X86_64 = 3,
139 }; 147 };
140 148
141 enum blkif_backend_type { 149 enum blkif_backend_type {
142 BLKIF_BACKEND_PHY = 1, 150 BLKIF_BACKEND_PHY = 1,
143 BLKIF_BACKEND_FILE = 2, 151 BLKIF_BACKEND_FILE = 2,
144 }; 152 };
145 153
146 struct xen_vbd { 154 struct xen_vbd {
147 /* What the domain refers to this vbd as. */ 155 /* What the domain refers to this vbd as. */
148 blkif_vdev_t handle; 156 blkif_vdev_t handle;
149 /* Non-zero -> read-only */ 157 /* Non-zero -> read-only */
150 unsigned char readonly; 158 unsigned char readonly;
151 /* VDISK_xxx */ 159 /* VDISK_xxx */
152 unsigned char type; 160 unsigned char type;
153 /* phys device that this vbd maps to. */ 161 /* phys device that this vbd maps to. */
154 u32 pdevice; 162 u32 pdevice;
155 struct block_device *bdev; 163 struct block_device *bdev;
156 /* Cached size parameter. */ 164 /* Cached size parameter. */
157 sector_t size; 165 sector_t size;
158 bool flush_support; 166 bool flush_support;
167 bool discard_secure;
159 }; 168 };
160 169
161 struct backend_info; 170 struct backend_info;
162 171
163 struct xen_blkif { 172 struct xen_blkif {
164 /* Unique identifier for this interface. */ 173 /* Unique identifier for this interface. */
165 domid_t domid; 174 domid_t domid;
166 unsigned int handle; 175 unsigned int handle;
167 /* Physical parameters of the comms window. */ 176 /* Physical parameters of the comms window. */
168 unsigned int irq; 177 unsigned int irq;
169 /* Comms information. */ 178 /* Comms information. */
170 enum blkif_protocol blk_protocol; 179 enum blkif_protocol blk_protocol;
171 enum blkif_backend_type blk_backend_type; 180 enum blkif_backend_type blk_backend_type;
172 union blkif_back_rings blk_rings; 181 union blkif_back_rings blk_rings;
173 void *blk_ring; 182 void *blk_ring;
174 /* The VBD attached to this interface. */ 183 /* The VBD attached to this interface. */
175 struct xen_vbd vbd; 184 struct xen_vbd vbd;
176 /* Back pointer to the backend_info. */ 185 /* Back pointer to the backend_info. */
177 struct backend_info *be; 186 struct backend_info *be;
178 /* Private fields. */ 187 /* Private fields. */
179 spinlock_t blk_ring_lock; 188 spinlock_t blk_ring_lock;
180 atomic_t refcnt; 189 atomic_t refcnt;
181 190
182 wait_queue_head_t wq; 191 wait_queue_head_t wq;
183 /* for barrier (drain) requests */ 192 /* for barrier (drain) requests */
184 struct completion drain_complete; 193 struct completion drain_complete;
185 atomic_t drain; 194 atomic_t drain;
186 /* One thread per one blkif. */ 195 /* One thread per one blkif. */
187 struct task_struct *xenblkd; 196 struct task_struct *xenblkd;
188 unsigned int waiting_reqs; 197 unsigned int waiting_reqs;
189 198
190 /* statistics */ 199 /* statistics */
191 unsigned long st_print; 200 unsigned long st_print;
192 int st_rd_req; 201 int st_rd_req;
193 int st_wr_req; 202 int st_wr_req;
194 int st_oo_req; 203 int st_oo_req;
195 int st_f_req; 204 int st_f_req;
196 int st_ds_req; 205 int st_ds_req;
197 int st_rd_sect; 206 int st_rd_sect;
198 int st_wr_sect; 207 int st_wr_sect;
199 208
200 wait_queue_head_t waiting_to_free; 209 wait_queue_head_t waiting_to_free;
201 }; 210 };
202 211
203 212
204 #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ 213 #define vbd_sz(_v) ((_v)->bdev->bd_part ? \
205 (_v)->bdev->bd_part->nr_sects : \ 214 (_v)->bdev->bd_part->nr_sects : \
206 get_capacity((_v)->bdev->bd_disk)) 215 get_capacity((_v)->bdev->bd_disk))
207 216
208 #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) 217 #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
209 #define xen_blkif_put(_b) \ 218 #define xen_blkif_put(_b) \
210 do { \ 219 do { \
211 if (atomic_dec_and_test(&(_b)->refcnt)) \ 220 if (atomic_dec_and_test(&(_b)->refcnt)) \
212 wake_up(&(_b)->waiting_to_free);\ 221 wake_up(&(_b)->waiting_to_free);\
213 } while (0) 222 } while (0)
214 223
215 struct phys_req { 224 struct phys_req {
216 unsigned short dev; 225 unsigned short dev;
217 blkif_sector_t nr_sects; 226 blkif_sector_t nr_sects;
218 struct block_device *bdev; 227 struct block_device *bdev;
219 blkif_sector_t sector_number; 228 blkif_sector_t sector_number;
220 }; 229 };
221 int xen_blkif_interface_init(void); 230 int xen_blkif_interface_init(void);
222 231
223 int xen_blkif_xenbus_init(void); 232 int xen_blkif_xenbus_init(void);
224 233
225 irqreturn_t xen_blkif_be_int(int irq, void *dev_id); 234 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
226 int xen_blkif_schedule(void *arg); 235 int xen_blkif_schedule(void *arg);
227 236
228 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, 237 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
229 struct backend_info *be, int state); 238 struct backend_info *be, int state);
230 239
231 int xen_blkbk_barrier(struct xenbus_transaction xbt, 240 int xen_blkbk_barrier(struct xenbus_transaction xbt,
232 struct backend_info *be, int state); 241 struct backend_info *be, int state);
233 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); 242 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
234 243
235 static inline void blkif_get_x86_32_req(struct blkif_request *dst, 244 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
236 struct blkif_x86_32_request *src) 245 struct blkif_x86_32_request *src)
237 { 246 {
238 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 247 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
239 dst->operation = src->operation; 248 dst->operation = src->operation;
240 dst->nr_segments = src->nr_segments;
241 dst->handle = src->handle;
242 dst->id = src->id;
243 switch (src->operation) { 249 switch (src->operation) {
244 case BLKIF_OP_READ: 250 case BLKIF_OP_READ:
245 case BLKIF_OP_WRITE: 251 case BLKIF_OP_WRITE:
246 case BLKIF_OP_WRITE_BARRIER: 252 case BLKIF_OP_WRITE_BARRIER:
247 case BLKIF_OP_FLUSH_DISKCACHE: 253 case BLKIF_OP_FLUSH_DISKCACHE:
254 dst->u.rw.nr_segments = src->u.rw.nr_segments;
255 dst->u.rw.handle = src->u.rw.handle;
256 dst->u.rw.id = src->u.rw.id;
248 dst->u.rw.sector_number = src->u.rw.sector_number; 257 dst->u.rw.sector_number = src->u.rw.sector_number;
249 barrier(); 258 barrier();
250 if (n > dst->nr_segments) 259 if (n > dst->u.rw.nr_segments)
251 n = dst->nr_segments; 260 n = dst->u.rw.nr_segments;
252 for (i = 0; i < n; i++) 261 for (i = 0; i < n; i++)
253 dst->u.rw.seg[i] = src->u.rw.seg[i]; 262 dst->u.rw.seg[i] = src->u.rw.seg[i];
254 break; 263 break;
255 case BLKIF_OP_DISCARD: 264 case BLKIF_OP_DISCARD:
265 dst->u.discard.flag = src->u.discard.flag;
256 dst->u.discard.sector_number = src->u.discard.sector_number; 266 dst->u.discard.sector_number = src->u.discard.sector_number;
257 dst->u.discard.nr_sectors = src->u.discard.nr_sectors; 267 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
258 break; 268 break;
259 default: 269 default:
260 break; 270 break;
261 } 271 }
262 } 272 }
263 273
264 static inline void blkif_get_x86_64_req(struct blkif_request *dst, 274 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
265 struct blkif_x86_64_request *src) 275 struct blkif_x86_64_request *src)
266 { 276 {
267 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 277 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
268 dst->operation = src->operation; 278 dst->operation = src->operation;
269 dst->nr_segments = src->nr_segments;
270 dst->handle = src->handle;
271 dst->id = src->id;
272 switch (src->operation) { 279 switch (src->operation) {
273 case BLKIF_OP_READ: 280 case BLKIF_OP_READ:
274 case BLKIF_OP_WRITE: 281 case BLKIF_OP_WRITE:
275 case BLKIF_OP_WRITE_BARRIER: 282 case BLKIF_OP_WRITE_BARRIER:
276 case BLKIF_OP_FLUSH_DISKCACHE: 283 case BLKIF_OP_FLUSH_DISKCACHE:
284 dst->u.rw.nr_segments = src->u.rw.nr_segments;
285 dst->u.rw.handle = src->u.rw.handle;
286 dst->u.rw.id = src->u.rw.id;
277 dst->u.rw.sector_number = src->u.rw.sector_number; 287 dst->u.rw.sector_number = src->u.rw.sector_number;
278 barrier(); 288 barrier();
279 if (n > dst->nr_segments) 289 if (n > dst->u.rw.nr_segments)
drivers/block/xen-blkback/xenbus.c
1 /* Xenbus code for blkif backend 1 /* Xenbus code for blkif backend
2 Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> 2 Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
3 Copyright (C) 2005 XenSource Ltd 3 Copyright (C) 2005 XenSource Ltd
4 4
5 This program is free software; you can redistribute it and/or modify 5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by 6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or 7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version. 8 (at your option) any later version.
9 9
10 This program is distributed in the hope that it will be useful, 10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details. 13 GNU General Public License for more details.
14 14
15 */ 15 */
16 16
17 #include <stdarg.h> 17 #include <stdarg.h>
18 #include <linux/module.h> 18 #include <linux/module.h>
19 #include <linux/kthread.h> 19 #include <linux/kthread.h>
20 #include <xen/events.h> 20 #include <xen/events.h>
21 #include <xen/grant_table.h> 21 #include <xen/grant_table.h>
22 #include "common.h" 22 #include "common.h"
23 23
24 struct backend_info { 24 struct backend_info {
25 struct xenbus_device *dev; 25 struct xenbus_device *dev;
26 struct xen_blkif *blkif; 26 struct xen_blkif *blkif;
27 struct xenbus_watch backend_watch; 27 struct xenbus_watch backend_watch;
28 unsigned major; 28 unsigned major;
29 unsigned minor; 29 unsigned minor;
30 char *mode; 30 char *mode;
31 }; 31 };
32 32
33 static struct kmem_cache *xen_blkif_cachep; 33 static struct kmem_cache *xen_blkif_cachep;
34 static void connect(struct backend_info *); 34 static void connect(struct backend_info *);
35 static int connect_ring(struct backend_info *); 35 static int connect_ring(struct backend_info *);
36 static void backend_changed(struct xenbus_watch *, const char **, 36 static void backend_changed(struct xenbus_watch *, const char **,
37 unsigned int); 37 unsigned int);
38 38
39 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) 39 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
40 { 40 {
41 return be->dev; 41 return be->dev;
42 } 42 }
43 43
44 static int blkback_name(struct xen_blkif *blkif, char *buf) 44 static int blkback_name(struct xen_blkif *blkif, char *buf)
45 { 45 {
46 char *devpath, *devname; 46 char *devpath, *devname;
47 struct xenbus_device *dev = blkif->be->dev; 47 struct xenbus_device *dev = blkif->be->dev;
48 48
49 devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); 49 devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
50 if (IS_ERR(devpath)) 50 if (IS_ERR(devpath))
51 return PTR_ERR(devpath); 51 return PTR_ERR(devpath);
52 52
53 devname = strstr(devpath, "/dev/"); 53 devname = strstr(devpath, "/dev/");
54 if (devname != NULL) 54 if (devname != NULL)
55 devname += strlen("/dev/"); 55 devname += strlen("/dev/");
56 else 56 else
57 devname = devpath; 57 devname = devpath;
58 58
59 snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); 59 snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
60 kfree(devpath); 60 kfree(devpath);
61 61
62 return 0; 62 return 0;
63 } 63 }
64 64
65 static void xen_update_blkif_status(struct xen_blkif *blkif) 65 static void xen_update_blkif_status(struct xen_blkif *blkif)
66 { 66 {
67 int err; 67 int err;
68 char name[TASK_COMM_LEN]; 68 char name[TASK_COMM_LEN];
69 69
70 /* Not ready to connect? */ 70 /* Not ready to connect? */
71 if (!blkif->irq || !blkif->vbd.bdev) 71 if (!blkif->irq || !blkif->vbd.bdev)
72 return; 72 return;
73 73
74 /* Already connected? */ 74 /* Already connected? */
75 if (blkif->be->dev->state == XenbusStateConnected) 75 if (blkif->be->dev->state == XenbusStateConnected)
76 return; 76 return;
77 77
78 /* Attempt to connect: exit if we fail to. */ 78 /* Attempt to connect: exit if we fail to. */
79 connect(blkif->be); 79 connect(blkif->be);
80 if (blkif->be->dev->state != XenbusStateConnected) 80 if (blkif->be->dev->state != XenbusStateConnected)
81 return; 81 return;
82 82
83 err = blkback_name(blkif, name); 83 err = blkback_name(blkif, name);
84 if (err) { 84 if (err) {
85 xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); 85 xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
86 return; 86 return;
87 } 87 }
88 88
89 err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); 89 err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
90 if (err) { 90 if (err) {
91 xenbus_dev_error(blkif->be->dev, err, "block flush"); 91 xenbus_dev_error(blkif->be->dev, err, "block flush");
92 return; 92 return;
93 } 93 }
94 invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); 94 invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
95 95
96 blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); 96 blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name);
97 if (IS_ERR(blkif->xenblkd)) { 97 if (IS_ERR(blkif->xenblkd)) {
98 err = PTR_ERR(blkif->xenblkd); 98 err = PTR_ERR(blkif->xenblkd);
99 blkif->xenblkd = NULL; 99 blkif->xenblkd = NULL;
100 xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); 100 xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
101 } 101 }
102 } 102 }
103 103
104 static struct xen_blkif *xen_blkif_alloc(domid_t domid) 104 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
105 { 105 {
106 struct xen_blkif *blkif; 106 struct xen_blkif *blkif;
107 107
108 blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); 108 blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL);
109 if (!blkif) 109 if (!blkif)
110 return ERR_PTR(-ENOMEM); 110 return ERR_PTR(-ENOMEM);
111 111
112 memset(blkif, 0, sizeof(*blkif)); 112 memset(blkif, 0, sizeof(*blkif));
113 blkif->domid = domid; 113 blkif->domid = domid;
114 spin_lock_init(&blkif->blk_ring_lock); 114 spin_lock_init(&blkif->blk_ring_lock);
115 atomic_set(&blkif->refcnt, 1); 115 atomic_set(&blkif->refcnt, 1);
116 init_waitqueue_head(&blkif->wq); 116 init_waitqueue_head(&blkif->wq);
117 init_completion(&blkif->drain_complete); 117 init_completion(&blkif->drain_complete);
118 atomic_set(&blkif->drain, 0); 118 atomic_set(&blkif->drain, 0);
119 blkif->st_print = jiffies; 119 blkif->st_print = jiffies;
120 init_waitqueue_head(&blkif->waiting_to_free); 120 init_waitqueue_head(&blkif->waiting_to_free);
121 121
122 return blkif; 122 return blkif;
123 } 123 }
124 124
125 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, 125 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
126 unsigned int evtchn) 126 unsigned int evtchn)
127 { 127 {
128 int err; 128 int err;
129 129
130 /* Already connected through? */ 130 /* Already connected through? */
131 if (blkif->irq) 131 if (blkif->irq)
132 return 0; 132 return 0;
133 133
134 err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); 134 err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
135 if (err < 0) 135 if (err < 0)
136 return err; 136 return err;
137 137
138 switch (blkif->blk_protocol) { 138 switch (blkif->blk_protocol) {
139 case BLKIF_PROTOCOL_NATIVE: 139 case BLKIF_PROTOCOL_NATIVE:
140 { 140 {
141 struct blkif_sring *sring; 141 struct blkif_sring *sring;
142 sring = (struct blkif_sring *)blkif->blk_ring; 142 sring = (struct blkif_sring *)blkif->blk_ring;
143 BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); 143 BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
144 break; 144 break;
145 } 145 }
146 case BLKIF_PROTOCOL_X86_32: 146 case BLKIF_PROTOCOL_X86_32:
147 { 147 {
148 struct blkif_x86_32_sring *sring_x86_32; 148 struct blkif_x86_32_sring *sring_x86_32;
149 sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; 149 sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
150 BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); 150 BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
151 break; 151 break;
152 } 152 }
153 case BLKIF_PROTOCOL_X86_64: 153 case BLKIF_PROTOCOL_X86_64:
154 { 154 {
155 struct blkif_x86_64_sring *sring_x86_64; 155 struct blkif_x86_64_sring *sring_x86_64;
156 sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; 156 sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
157 BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); 157 BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
158 break; 158 break;
159 } 159 }
160 default: 160 default:
161 BUG(); 161 BUG();
162 } 162 }
163 163
164 err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, 164 err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
165 xen_blkif_be_int, 0, 165 xen_blkif_be_int, 0,
166 "blkif-backend", blkif); 166 "blkif-backend", blkif);
167 if (err < 0) { 167 if (err < 0) {
168 xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); 168 xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
169 blkif->blk_rings.common.sring = NULL; 169 blkif->blk_rings.common.sring = NULL;
170 return err; 170 return err;
171 } 171 }
172 blkif->irq = err; 172 blkif->irq = err;
173 173
174 return 0; 174 return 0;
175 } 175 }
176 176
177 static void xen_blkif_disconnect(struct xen_blkif *blkif) 177 static void xen_blkif_disconnect(struct xen_blkif *blkif)
178 { 178 {
179 if (blkif->xenblkd) { 179 if (blkif->xenblkd) {
180 kthread_stop(blkif->xenblkd); 180 kthread_stop(blkif->xenblkd);
181 blkif->xenblkd = NULL; 181 blkif->xenblkd = NULL;
182 } 182 }
183 183
184 atomic_dec(&blkif->refcnt); 184 atomic_dec(&blkif->refcnt);
185 wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); 185 wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
186 atomic_inc(&blkif->refcnt); 186 atomic_inc(&blkif->refcnt);
187 187
188 if (blkif->irq) { 188 if (blkif->irq) {
189 unbind_from_irqhandler(blkif->irq, blkif); 189 unbind_from_irqhandler(blkif->irq, blkif);
190 blkif->irq = 0; 190 blkif->irq = 0;
191 } 191 }
192 192
193 if (blkif->blk_rings.common.sring) { 193 if (blkif->blk_rings.common.sring) {
194 xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); 194 xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
195 blkif->blk_rings.common.sring = NULL; 195 blkif->blk_rings.common.sring = NULL;
196 } 196 }
197 } 197 }
198 198
199 void xen_blkif_free(struct xen_blkif *blkif) 199 void xen_blkif_free(struct xen_blkif *blkif)
200 { 200 {
201 if (!atomic_dec_and_test(&blkif->refcnt)) 201 if (!atomic_dec_and_test(&blkif->refcnt))
202 BUG(); 202 BUG();
203 kmem_cache_free(xen_blkif_cachep, blkif); 203 kmem_cache_free(xen_blkif_cachep, blkif);
204 } 204 }
205 205
206 int __init xen_blkif_interface_init(void) 206 int __init xen_blkif_interface_init(void)
207 { 207 {
208 xen_blkif_cachep = kmem_cache_create("blkif_cache", 208 xen_blkif_cachep = kmem_cache_create("blkif_cache",
209 sizeof(struct xen_blkif), 209 sizeof(struct xen_blkif),
210 0, 0, NULL); 210 0, 0, NULL);
211 if (!xen_blkif_cachep) 211 if (!xen_blkif_cachep)
212 return -ENOMEM; 212 return -ENOMEM;
213 213
214 return 0; 214 return 0;
215 } 215 }
216 216
217 /* 217 /*
218 * sysfs interface for VBD I/O requests 218 * sysfs interface for VBD I/O requests
219 */ 219 */
220 220
221 #define VBD_SHOW(name, format, args...) \ 221 #define VBD_SHOW(name, format, args...) \
222 static ssize_t show_##name(struct device *_dev, \ 222 static ssize_t show_##name(struct device *_dev, \
223 struct device_attribute *attr, \ 223 struct device_attribute *attr, \
224 char *buf) \ 224 char *buf) \
225 { \ 225 { \
226 struct xenbus_device *dev = to_xenbus_device(_dev); \ 226 struct xenbus_device *dev = to_xenbus_device(_dev); \
227 struct backend_info *be = dev_get_drvdata(&dev->dev); \ 227 struct backend_info *be = dev_get_drvdata(&dev->dev); \
228 \ 228 \
229 return sprintf(buf, format, ##args); \ 229 return sprintf(buf, format, ##args); \
230 } \ 230 } \
231 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 231 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
232 232
233 VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); 233 VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
234 VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); 234 VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
235 VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); 235 VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
236 VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); 236 VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
237 VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); 237 VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req);
238 VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); 238 VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
239 VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); 239 VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
240 240
241 static struct attribute *xen_vbdstat_attrs[] = { 241 static struct attribute *xen_vbdstat_attrs[] = {
242 &dev_attr_oo_req.attr, 242 &dev_attr_oo_req.attr,
243 &dev_attr_rd_req.attr, 243 &dev_attr_rd_req.attr,
244 &dev_attr_wr_req.attr, 244 &dev_attr_wr_req.attr,
245 &dev_attr_f_req.attr, 245 &dev_attr_f_req.attr,
246 &dev_attr_ds_req.attr, 246 &dev_attr_ds_req.attr,
247 &dev_attr_rd_sect.attr, 247 &dev_attr_rd_sect.attr,
248 &dev_attr_wr_sect.attr, 248 &dev_attr_wr_sect.attr,
249 NULL 249 NULL
250 }; 250 };
251 251
252 static struct attribute_group xen_vbdstat_group = { 252 static struct attribute_group xen_vbdstat_group = {
253 .name = "statistics", 253 .name = "statistics",
254 .attrs = xen_vbdstat_attrs, 254 .attrs = xen_vbdstat_attrs,
255 }; 255 };
256 256
257 VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); 257 VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
258 VBD_SHOW(mode, "%s\n", be->mode); 258 VBD_SHOW(mode, "%s\n", be->mode);
259 259
260 int xenvbd_sysfs_addif(struct xenbus_device *dev) 260 int xenvbd_sysfs_addif(struct xenbus_device *dev)
261 { 261 {
262 int error; 262 int error;
263 263
264 error = device_create_file(&dev->dev, &dev_attr_physical_device); 264 error = device_create_file(&dev->dev, &dev_attr_physical_device);
265 if (error) 265 if (error)
266 goto fail1; 266 goto fail1;
267 267
268 error = device_create_file(&dev->dev, &dev_attr_mode); 268 error = device_create_file(&dev->dev, &dev_attr_mode);
269 if (error) 269 if (error)
270 goto fail2; 270 goto fail2;
271 271
272 error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group); 272 error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
273 if (error) 273 if (error)
274 goto fail3; 274 goto fail3;
275 275
276 return 0; 276 return 0;
277 277
278 fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); 278 fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
279 fail2: device_remove_file(&dev->dev, &dev_attr_mode); 279 fail2: device_remove_file(&dev->dev, &dev_attr_mode);
280 fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); 280 fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
281 return error; 281 return error;
282 } 282 }
283 283
284 void xenvbd_sysfs_delif(struct xenbus_device *dev) 284 void xenvbd_sysfs_delif(struct xenbus_device *dev)
285 { 285 {
286 sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); 286 sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
287 device_remove_file(&dev->dev, &dev_attr_mode); 287 device_remove_file(&dev->dev, &dev_attr_mode);
288 device_remove_file(&dev->dev, &dev_attr_physical_device); 288 device_remove_file(&dev->dev, &dev_attr_physical_device);
289 } 289 }
290 290
291 291
292 static void xen_vbd_free(struct xen_vbd *vbd) 292 static void xen_vbd_free(struct xen_vbd *vbd)
293 { 293 {
294 if (vbd->bdev) 294 if (vbd->bdev)
295 blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); 295 blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
296 vbd->bdev = NULL; 296 vbd->bdev = NULL;
297 } 297 }
298 298
299 static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, 299 static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
300 unsigned major, unsigned minor, int readonly, 300 unsigned major, unsigned minor, int readonly,
301 int cdrom) 301 int cdrom)
302 { 302 {
303 struct xen_vbd *vbd; 303 struct xen_vbd *vbd;
304 struct block_device *bdev; 304 struct block_device *bdev;
305 struct request_queue *q; 305 struct request_queue *q;
306 306
307 vbd = &blkif->vbd; 307 vbd = &blkif->vbd;
308 vbd->handle = handle; 308 vbd->handle = handle;
309 vbd->readonly = readonly; 309 vbd->readonly = readonly;
310 vbd->type = 0; 310 vbd->type = 0;
311 311
312 vbd->pdevice = MKDEV(major, minor); 312 vbd->pdevice = MKDEV(major, minor);
313 313
314 bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? 314 bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
315 FMODE_READ : FMODE_WRITE, NULL); 315 FMODE_READ : FMODE_WRITE, NULL);
316 316
317 if (IS_ERR(bdev)) { 317 if (IS_ERR(bdev)) {
318 DPRINTK("xen_vbd_create: device %08x could not be opened.\n", 318 DPRINTK("xen_vbd_create: device %08x could not be opened.\n",
319 vbd->pdevice); 319 vbd->pdevice);
320 return -ENOENT; 320 return -ENOENT;
321 } 321 }
322 322
323 vbd->bdev = bdev; 323 vbd->bdev = bdev;
324 if (vbd->bdev->bd_disk == NULL) { 324 if (vbd->bdev->bd_disk == NULL) {
325 DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", 325 DPRINTK("xen_vbd_create: device %08x doesn't exist.\n",
326 vbd->pdevice); 326 vbd->pdevice);
327 xen_vbd_free(vbd); 327 xen_vbd_free(vbd);
328 return -ENOENT; 328 return -ENOENT;
329 } 329 }
330 vbd->size = vbd_sz(vbd); 330 vbd->size = vbd_sz(vbd);
331 331
332 if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) 332 if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
333 vbd->type |= VDISK_CDROM; 333 vbd->type |= VDISK_CDROM;
334 if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) 334 if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
335 vbd->type |= VDISK_REMOVABLE; 335 vbd->type |= VDISK_REMOVABLE;
336 336
337 q = bdev_get_queue(bdev); 337 q = bdev_get_queue(bdev);
338 if (q && q->flush_flags) 338 if (q && q->flush_flags)
339 vbd->flush_support = true; 339 vbd->flush_support = true;
340 340
341 if (q && blk_queue_secdiscard(q))
342 vbd->discard_secure = true;
343
341 DPRINTK("Successful creation of handle=%04x (dom=%u)\n", 344 DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
342 handle, blkif->domid); 345 handle, blkif->domid);
343 return 0; 346 return 0;
344 } 347 }
345 static int xen_blkbk_remove(struct xenbus_device *dev) 348 static int xen_blkbk_remove(struct xenbus_device *dev)
346 { 349 {
347 struct backend_info *be = dev_get_drvdata(&dev->dev); 350 struct backend_info *be = dev_get_drvdata(&dev->dev);
348 351
349 DPRINTK(""); 352 DPRINTK("");
350 353
351 if (be->major || be->minor) 354 if (be->major || be->minor)
352 xenvbd_sysfs_delif(dev); 355 xenvbd_sysfs_delif(dev);
353 356
354 if (be->backend_watch.node) { 357 if (be->backend_watch.node) {
355 unregister_xenbus_watch(&be->backend_watch); 358 unregister_xenbus_watch(&be->backend_watch);
356 kfree(be->backend_watch.node); 359 kfree(be->backend_watch.node);
357 be->backend_watch.node = NULL; 360 be->backend_watch.node = NULL;
358 } 361 }
359 362
360 if (be->blkif) { 363 if (be->blkif) {
361 xen_blkif_disconnect(be->blkif); 364 xen_blkif_disconnect(be->blkif);
362 xen_vbd_free(&be->blkif->vbd); 365 xen_vbd_free(&be->blkif->vbd);
363 xen_blkif_free(be->blkif); 366 xen_blkif_free(be->blkif);
364 be->blkif = NULL; 367 be->blkif = NULL;
365 } 368 }
366 369
367 kfree(be); 370 kfree(be);
368 dev_set_drvdata(&dev->dev, NULL); 371 dev_set_drvdata(&dev->dev, NULL);
369 return 0; 372 return 0;
370 } 373 }
371 374
372 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, 375 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
373 struct backend_info *be, int state) 376 struct backend_info *be, int state)
374 { 377 {
375 struct xenbus_device *dev = be->dev; 378 struct xenbus_device *dev = be->dev;
376 int err; 379 int err;
377 380
378 err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", 381 err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
379 "%d", state); 382 "%d", state);
380 if (err) 383 if (err)
381 xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); 384 xenbus_dev_fatal(dev, err, "writing feature-flush-cache");
382 385
383 return err; 386 return err;
384 } 387 }
385 388
386 int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) 389 int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
387 { 390 {
388 struct xenbus_device *dev = be->dev; 391 struct xenbus_device *dev = be->dev;
389 struct xen_blkif *blkif = be->blkif; 392 struct xen_blkif *blkif = be->blkif;
390 char *type; 393 char *type;
391 int err; 394 int err;
392 int state = 0; 395 int state = 0;
393 396
394 type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); 397 type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
395 if (!IS_ERR(type)) { 398 if (!IS_ERR(type)) {
396 if (strncmp(type, "file", 4) == 0) { 399 if (strncmp(type, "file", 4) == 0) {
397 state = 1; 400 state = 1;
398 blkif->blk_backend_type = BLKIF_BACKEND_FILE; 401 blkif->blk_backend_type = BLKIF_BACKEND_FILE;
399 } 402 }
400 if (strncmp(type, "phy", 3) == 0) { 403 if (strncmp(type, "phy", 3) == 0) {
401 struct block_device *bdev = be->blkif->vbd.bdev; 404 struct block_device *bdev = be->blkif->vbd.bdev;
402 struct request_queue *q = bdev_get_queue(bdev); 405 struct request_queue *q = bdev_get_queue(bdev);
403 if (blk_queue_discard(q)) { 406 if (blk_queue_discard(q)) {
404 err = xenbus_printf(xbt, dev->nodename, 407 err = xenbus_printf(xbt, dev->nodename,
405 "discard-granularity", "%u", 408 "discard-granularity", "%u",
406 q->limits.discard_granularity); 409 q->limits.discard_granularity);
407 if (err) { 410 if (err) {
408 xenbus_dev_fatal(dev, err, 411 xenbus_dev_fatal(dev, err,
409 "writing discard-granularity"); 412 "writing discard-granularity");
410 goto kfree; 413 goto kfree;
411 } 414 }
412 err = xenbus_printf(xbt, dev->nodename, 415 err = xenbus_printf(xbt, dev->nodename,
413 "discard-alignment", "%u", 416 "discard-alignment", "%u",
414 q->limits.discard_alignment); 417 q->limits.discard_alignment);
415 if (err) { 418 if (err) {
416 xenbus_dev_fatal(dev, err, 419 xenbus_dev_fatal(dev, err,
417 "writing discard-alignment"); 420 "writing discard-alignment");
418 goto kfree; 421 goto kfree;
419 } 422 }
420 state = 1; 423 state = 1;
421 blkif->blk_backend_type = BLKIF_BACKEND_PHY; 424 blkif->blk_backend_type = BLKIF_BACKEND_PHY;
425 }
426 /* Optional. */
427 err = xenbus_printf(xbt, dev->nodename,
428 "discard-secure", "%d",
429 blkif->vbd.discard_secure);
430 if (err) {
431 xenbus_dev_fatal(dev, err,
432 "writting discard-secure");
433 goto kfree;
422 } 434 }
423 } 435 }
424 } else { 436 } else {
425 err = PTR_ERR(type); 437 err = PTR_ERR(type);
426 xenbus_dev_fatal(dev, err, "reading type"); 438 xenbus_dev_fatal(dev, err, "reading type");
427 goto out; 439 goto out;
428 } 440 }
429 441
430 err = xenbus_printf(xbt, dev->nodename, "feature-discard", 442 err = xenbus_printf(xbt, dev->nodename, "feature-discard",
431 "%d", state); 443 "%d", state);
432 if (err) 444 if (err)
433 xenbus_dev_fatal(dev, err, "writing feature-discard"); 445 xenbus_dev_fatal(dev, err, "writing feature-discard");
434 kfree: 446 kfree:
435 kfree(type); 447 kfree(type);
436 out: 448 out:
437 return err; 449 return err;
438 } 450 }
439 int xen_blkbk_barrier(struct xenbus_transaction xbt, 451 int xen_blkbk_barrier(struct xenbus_transaction xbt,
440 struct backend_info *be, int state) 452 struct backend_info *be, int state)
441 { 453 {
442 struct xenbus_device *dev = be->dev; 454 struct xenbus_device *dev = be->dev;
443 int err; 455 int err;
444 456
445 err = xenbus_printf(xbt, dev->nodename, "feature-barrier", 457 err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
446 "%d", state); 458 "%d", state);
447 if (err) 459 if (err)
448 xenbus_dev_fatal(dev, err, "writing feature-barrier"); 460 xenbus_dev_fatal(dev, err, "writing feature-barrier");
449 461
450 return err; 462 return err;
451 } 463 }
452 464
453 /* 465 /*
454 * Entry point to this code when a new device is created. Allocate the basic 466 * Entry point to this code when a new device is created. Allocate the basic
455 * structures, and watch the store waiting for the hotplug scripts to tell us 467 * structures, and watch the store waiting for the hotplug scripts to tell us
456 * the device's physical major and minor numbers. Switch to InitWait. 468 * the device's physical major and minor numbers. Switch to InitWait.
457 */ 469 */
458 static int xen_blkbk_probe(struct xenbus_device *dev, 470 static int xen_blkbk_probe(struct xenbus_device *dev,
459 const struct xenbus_device_id *id) 471 const struct xenbus_device_id *id)
460 { 472 {
461 int err; 473 int err;
462 struct backend_info *be = kzalloc(sizeof(struct backend_info), 474 struct backend_info *be = kzalloc(sizeof(struct backend_info),
463 GFP_KERNEL); 475 GFP_KERNEL);
464 if (!be) { 476 if (!be) {
465 xenbus_dev_fatal(dev, -ENOMEM, 477 xenbus_dev_fatal(dev, -ENOMEM,
466 "allocating backend structure"); 478 "allocating backend structure");
467 return -ENOMEM; 479 return -ENOMEM;
468 } 480 }
469 be->dev = dev; 481 be->dev = dev;
470 dev_set_drvdata(&dev->dev, be); 482 dev_set_drvdata(&dev->dev, be);
471 483
472 be->blkif = xen_blkif_alloc(dev->otherend_id); 484 be->blkif = xen_blkif_alloc(dev->otherend_id);
473 if (IS_ERR(be->blkif)) { 485 if (IS_ERR(be->blkif)) {
474 err = PTR_ERR(be->blkif); 486 err = PTR_ERR(be->blkif);
475 be->blkif = NULL; 487 be->blkif = NULL;
476 xenbus_dev_fatal(dev, err, "creating block interface"); 488 xenbus_dev_fatal(dev, err, "creating block interface");
477 goto fail; 489 goto fail;
478 } 490 }
479 491
480 /* setup back pointer */ 492 /* setup back pointer */
481 be->blkif->be = be; 493 be->blkif->be = be;
482 494
483 err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, 495 err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
484 "%s/%s", dev->nodename, "physical-device"); 496 "%s/%s", dev->nodename, "physical-device");
485 if (err) 497 if (err)
486 goto fail; 498 goto fail;
487 499
488 err = xenbus_switch_state(dev, XenbusStateInitWait); 500 err = xenbus_switch_state(dev, XenbusStateInitWait);
489 if (err) 501 if (err)
490 goto fail; 502 goto fail;
491 503
492 return 0; 504 return 0;
493 505
494 fail: 506 fail:
495 DPRINTK("failed"); 507 DPRINTK("failed");
496 xen_blkbk_remove(dev); 508 xen_blkbk_remove(dev);
497 return err; 509 return err;
498 } 510 }
499 511
500 512
501 /* 513 /*
502 * Callback received when the hotplug scripts have placed the physical-device 514 * Callback received when the hotplug scripts have placed the physical-device
503 * node. Read it and the mode node, and create a vbd. If the frontend is 515 * node. Read it and the mode node, and create a vbd. If the frontend is
504 * ready, connect. 516 * ready, connect.
505 */ 517 */
506 static void backend_changed(struct xenbus_watch *watch, 518 static void backend_changed(struct xenbus_watch *watch,
507 const char **vec, unsigned int len) 519 const char **vec, unsigned int len)
508 { 520 {
509 int err; 521 int err;
510 unsigned major; 522 unsigned major;
511 unsigned minor; 523 unsigned minor;
512 struct backend_info *be 524 struct backend_info *be
513 = container_of(watch, struct backend_info, backend_watch); 525 = container_of(watch, struct backend_info, backend_watch);
514 struct xenbus_device *dev = be->dev; 526 struct xenbus_device *dev = be->dev;
515 int cdrom = 0; 527 int cdrom = 0;
516 char *device_type; 528 char *device_type;
517 529
518 DPRINTK(""); 530 DPRINTK("");
519 531
520 err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", 532 err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
521 &major, &minor); 533 &major, &minor);
522 if (XENBUS_EXIST_ERR(err)) { 534 if (XENBUS_EXIST_ERR(err)) {
523 /* 535 /*
524 * Since this watch will fire once immediately after it is 536 * Since this watch will fire once immediately after it is
525 * registered, we expect this. Ignore it, and wait for the 537 * registered, we expect this. Ignore it, and wait for the
526 * hotplug scripts. 538 * hotplug scripts.
527 */ 539 */
528 return; 540 return;
529 } 541 }
530 if (err != 2) { 542 if (err != 2) {
531 xenbus_dev_fatal(dev, err, "reading physical-device"); 543 xenbus_dev_fatal(dev, err, "reading physical-device");
532 return; 544 return;
533 } 545 }
534 546
535 if ((be->major || be->minor) && 547 if ((be->major || be->minor) &&
536 ((be->major != major) || (be->minor != minor))) { 548 ((be->major != major) || (be->minor != minor))) {
537 pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", 549 pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
538 be->major, be->minor, major, minor); 550 be->major, be->minor, major, minor);
539 return; 551 return;
540 } 552 }
541 553
542 be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); 554 be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
543 if (IS_ERR(be->mode)) { 555 if (IS_ERR(be->mode)) {
544 err = PTR_ERR(be->mode); 556 err = PTR_ERR(be->mode);
545 be->mode = NULL; 557 be->mode = NULL;
546 xenbus_dev_fatal(dev, err, "reading mode"); 558 xenbus_dev_fatal(dev, err, "reading mode");
547 return; 559 return;
548 } 560 }
549 561
550 device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); 562 device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
551 if (!IS_ERR(device_type)) { 563 if (!IS_ERR(device_type)) {
552 cdrom = strcmp(device_type, "cdrom") == 0; 564 cdrom = strcmp(device_type, "cdrom") == 0;
553 kfree(device_type); 565 kfree(device_type);
554 } 566 }
555 567
556 if (be->major == 0 && be->minor == 0) { 568 if (be->major == 0 && be->minor == 0) {
557 /* Front end dir is a number, which is used as the handle. */ 569 /* Front end dir is a number, which is used as the handle. */
558 570
559 char *p = strrchr(dev->otherend, '/') + 1; 571 char *p = strrchr(dev->otherend, '/') + 1;
560 long handle; 572 long handle;
561 err = strict_strtoul(p, 0, &handle); 573 err = strict_strtoul(p, 0, &handle);
562 if (err) 574 if (err)
563 return; 575 return;
564 576
565 be->major = major; 577 be->major = major;
566 be->minor = minor; 578 be->minor = minor;
567 579
568 err = xen_vbd_create(be->blkif, handle, major, minor, 580 err = xen_vbd_create(be->blkif, handle, major, minor,
569 (NULL == strchr(be->mode, 'w')), cdrom); 581 (NULL == strchr(be->mode, 'w')), cdrom);
570 if (err) { 582 if (err) {
571 be->major = 0; 583 be->major = 0;
572 be->minor = 0; 584 be->minor = 0;
573 xenbus_dev_fatal(dev, err, "creating vbd structure"); 585 xenbus_dev_fatal(dev, err, "creating vbd structure");
574 return; 586 return;
575 } 587 }
576 588
577 err = xenvbd_sysfs_addif(dev); 589 err = xenvbd_sysfs_addif(dev);
578 if (err) { 590 if (err) {
579 xen_vbd_free(&be->blkif->vbd); 591 xen_vbd_free(&be->blkif->vbd);
580 be->major = 0; 592 be->major = 0;
581 be->minor = 0; 593 be->minor = 0;
582 xenbus_dev_fatal(dev, err, "creating sysfs entries"); 594 xenbus_dev_fatal(dev, err, "creating sysfs entries");
583 return; 595 return;
584 } 596 }
585 597
586 /* We're potentially connected now */ 598 /* We're potentially connected now */
587 xen_update_blkif_status(be->blkif); 599 xen_update_blkif_status(be->blkif);
588 } 600 }
589 } 601 }
590 602
591 603
592 /* 604 /*
593 * Callback received when the frontend's state changes. 605 * Callback received when the frontend's state changes.
594 */ 606 */
595 static void frontend_changed(struct xenbus_device *dev, 607 static void frontend_changed(struct xenbus_device *dev,
596 enum xenbus_state frontend_state) 608 enum xenbus_state frontend_state)
597 { 609 {
598 struct backend_info *be = dev_get_drvdata(&dev->dev); 610 struct backend_info *be = dev_get_drvdata(&dev->dev);
599 int err; 611 int err;
600 612
601 DPRINTK("%s", xenbus_strstate(frontend_state)); 613 DPRINTK("%s", xenbus_strstate(frontend_state));
602 614
603 switch (frontend_state) { 615 switch (frontend_state) {
604 case XenbusStateInitialising: 616 case XenbusStateInitialising:
605 if (dev->state == XenbusStateClosed) { 617 if (dev->state == XenbusStateClosed) {
606 pr_info(DRV_PFX "%s: prepare for reconnect\n", 618 pr_info(DRV_PFX "%s: prepare for reconnect\n",
607 dev->nodename); 619 dev->nodename);
608 xenbus_switch_state(dev, XenbusStateInitWait); 620 xenbus_switch_state(dev, XenbusStateInitWait);
609 } 621 }
610 break; 622 break;
611 623
612 case XenbusStateInitialised: 624 case XenbusStateInitialised:
613 case XenbusStateConnected: 625 case XenbusStateConnected:
614 /* 626 /*
615 * Ensure we connect even when two watches fire in 627 * Ensure we connect even when two watches fire in
616 * close succession and we miss the intermediate value 628 * close succession and we miss the intermediate value
617 * of frontend_state. 629 * of frontend_state.
618 */ 630 */
619 if (dev->state == XenbusStateConnected) 631 if (dev->state == XenbusStateConnected)
620 break; 632 break;
621 633
622 /* 634 /*
623 * Enforce precondition before potential leak point. 635 * Enforce precondition before potential leak point.
624 * xen_blkif_disconnect() is idempotent. 636 * xen_blkif_disconnect() is idempotent.
625 */ 637 */
626 xen_blkif_disconnect(be->blkif); 638 xen_blkif_disconnect(be->blkif);
627 639
628 err = connect_ring(be); 640 err = connect_ring(be);
629 if (err) 641 if (err)
630 break; 642 break;
631 xen_update_blkif_status(be->blkif); 643 xen_update_blkif_status(be->blkif);
632 break; 644 break;
633 645
634 case XenbusStateClosing: 646 case XenbusStateClosing:
635 xenbus_switch_state(dev, XenbusStateClosing); 647 xenbus_switch_state(dev, XenbusStateClosing);
636 break; 648 break;
637 649
638 case XenbusStateClosed: 650 case XenbusStateClosed:
639 xen_blkif_disconnect(be->blkif); 651 xen_blkif_disconnect(be->blkif);
640 xenbus_switch_state(dev, XenbusStateClosed); 652 xenbus_switch_state(dev, XenbusStateClosed);
641 if (xenbus_dev_is_online(dev)) 653 if (xenbus_dev_is_online(dev))
642 break; 654 break;
643 /* fall through if not online */ 655 /* fall through if not online */
644 case XenbusStateUnknown: 656 case XenbusStateUnknown:
645 /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ 657 /* implies xen_blkif_disconnect() via xen_blkbk_remove() */
646 device_unregister(&dev->dev); 658 device_unregister(&dev->dev);
647 break; 659 break;
648 660
649 default: 661 default:
650 xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", 662 xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
651 frontend_state); 663 frontend_state);
652 break; 664 break;
653 } 665 }
654 } 666 }
655 667
656 668
657 /* ** Connection ** */ 669 /* ** Connection ** */
658 670
659 671
660 /* 672 /*
661 * Write the physical details regarding the block device to the store, and 673 * Write the physical details regarding the block device to the store, and
662 * switch to Connected state. 674 * switch to Connected state.
663 */ 675 */
664 static void connect(struct backend_info *be) 676 static void connect(struct backend_info *be)
665 { 677 {
666 struct xenbus_transaction xbt; 678 struct xenbus_transaction xbt;
667 int err; 679 int err;
668 struct xenbus_device *dev = be->dev; 680 struct xenbus_device *dev = be->dev;
669 681
670 DPRINTK("%s", dev->otherend); 682 DPRINTK("%s", dev->otherend);
671 683
672 /* Supply the information about the device the frontend needs */ 684 /* Supply the information about the device the frontend needs */
673 again: 685 again:
674 err = xenbus_transaction_start(&xbt); 686 err = xenbus_transaction_start(&xbt);
675 if (err) { 687 if (err) {
676 xenbus_dev_fatal(dev, err, "starting transaction"); 688 xenbus_dev_fatal(dev, err, "starting transaction");
677 return; 689 return;
678 } 690 }
679 691
680 err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); 692 err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
681 if (err) 693 if (err)
682 goto abort; 694 goto abort;
683 695
684 err = xen_blkbk_discard(xbt, be); 696 err = xen_blkbk_discard(xbt, be);
685 697
686 /* If we can't advertise it is OK. */ 698 /* If we can't advertise it is OK. */
687 err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); 699 err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
688 700
689 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 701 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
690 (unsigned long long)vbd_sz(&be->blkif->vbd)); 702 (unsigned long long)vbd_sz(&be->blkif->vbd));
691 if (err) { 703 if (err) {
692 xenbus_dev_fatal(dev, err, "writing %s/sectors", 704 xenbus_dev_fatal(dev, err, "writing %s/sectors",
693 dev->nodename); 705 dev->nodename);
694 goto abort; 706 goto abort;
695 } 707 }
696 708
697 /* FIXME: use a typename instead */ 709 /* FIXME: use a typename instead */
698 err = xenbus_printf(xbt, dev->nodename, "info", "%u", 710 err = xenbus_printf(xbt, dev->nodename, "info", "%u",
699 be->blkif->vbd.type | 711 be->blkif->vbd.type |
700 (be->blkif->vbd.readonly ? VDISK_READONLY : 0)); 712 (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
701 if (err) { 713 if (err) {
702 xenbus_dev_fatal(dev, err, "writing %s/info", 714 xenbus_dev_fatal(dev, err, "writing %s/info",
703 dev->nodename); 715 dev->nodename);
704 goto abort; 716 goto abort;
705 } 717 }
706 err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", 718 err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
707 (unsigned long) 719 (unsigned long)
708 bdev_logical_block_size(be->blkif->vbd.bdev)); 720 bdev_logical_block_size(be->blkif->vbd.bdev));
709 if (err) { 721 if (err) {
710 xenbus_dev_fatal(dev, err, "writing %s/sector-size", 722 xenbus_dev_fatal(dev, err, "writing %s/sector-size",
711 dev->nodename); 723 dev->nodename);
712 goto abort; 724 goto abort;
713 } 725 }
714 726
715 err = xenbus_transaction_end(xbt, 0); 727 err = xenbus_transaction_end(xbt, 0);
716 if (err == -EAGAIN) 728 if (err == -EAGAIN)
717 goto again; 729 goto again;
718 if (err) 730 if (err)
719 xenbus_dev_fatal(dev, err, "ending transaction"); 731 xenbus_dev_fatal(dev, err, "ending transaction");
720 732
721 err = xenbus_switch_state(dev, XenbusStateConnected); 733 err = xenbus_switch_state(dev, XenbusStateConnected);
722 if (err) 734 if (err)
723 xenbus_dev_fatal(dev, err, "%s: switching to Connected state", 735 xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
724 dev->nodename); 736 dev->nodename);
725 737
726 return; 738 return;
727 abort: 739 abort:
728 xenbus_transaction_end(xbt, 1); 740 xenbus_transaction_end(xbt, 1);
729 } 741 }
730 742
731 743
732 static int connect_ring(struct backend_info *be) 744 static int connect_ring(struct backend_info *be)
733 { 745 {
734 struct xenbus_device *dev = be->dev; 746 struct xenbus_device *dev = be->dev;
735 unsigned long ring_ref; 747 unsigned long ring_ref;
736 unsigned int evtchn; 748 unsigned int evtchn;
737 char protocol[64] = ""; 749 char protocol[64] = "";
738 int err; 750 int err;
739 751
740 DPRINTK("%s", dev->otherend); 752 DPRINTK("%s", dev->otherend);
741 753
742 err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", 754 err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
743 &ring_ref, "event-channel", "%u", &evtchn, NULL); 755 &ring_ref, "event-channel", "%u", &evtchn, NULL);
744 if (err) { 756 if (err) {
745 xenbus_dev_fatal(dev, err, 757 xenbus_dev_fatal(dev, err,
746 "reading %s/ring-ref and event-channel", 758 "reading %s/ring-ref and event-channel",
747 dev->otherend); 759 dev->otherend);
748 return err; 760 return err;
749 } 761 }
750 762
751 be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; 763 be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
752 err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", 764 err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
753 "%63s", protocol, NULL); 765 "%63s", protocol, NULL);
754 if (err) 766 if (err)
755 strcpy(protocol, "unspecified, assuming native"); 767 strcpy(protocol, "unspecified, assuming native");
756 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) 768 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
757 be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; 769 be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
758 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) 770 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
759 be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; 771 be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
760 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) 772 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
761 be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; 773 be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
762 else { 774 else {
763 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); 775 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
764 return -1; 776 return -1;
765 } 777 }
766 pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", 778 pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
767 ring_ref, evtchn, be->blkif->blk_protocol, protocol); 779 ring_ref, evtchn, be->blkif->blk_protocol, protocol);
768 780
769 /* Map the shared frame, irq etc. */ 781 /* Map the shared frame, irq etc. */
770 err = xen_blkif_map(be->blkif, ring_ref, evtchn); 782 err = xen_blkif_map(be->blkif, ring_ref, evtchn);
771 if (err) { 783 if (err) {
772 xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", 784 xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
773 ring_ref, evtchn); 785 ring_ref, evtchn);
774 return err; 786 return err;
775 } 787 }
776 788
777 return 0; 789 return 0;
778 } 790 }
779 791
780 792
781 /* ** Driver Registration ** */ 793 /* ** Driver Registration ** */
782 794
783 795
784 static const struct xenbus_device_id xen_blkbk_ids[] = { 796 static const struct xenbus_device_id xen_blkbk_ids[] = {
785 { "vbd" }, 797 { "vbd" },
786 { "" } 798 { "" }
787 }; 799 };
788 800
789 801
790 static DEFINE_XENBUS_DRIVER(xen_blkbk, , 802 static DEFINE_XENBUS_DRIVER(xen_blkbk, ,
791 .probe = xen_blkbk_probe, 803 .probe = xen_blkbk_probe,
792 .remove = xen_blkbk_remove, 804 .remove = xen_blkbk_remove,
793 .otherend_changed = frontend_changed 805 .otherend_changed = frontend_changed
794 ); 806 );
795 807
796 808
797 int xen_blkif_xenbus_init(void) 809 int xen_blkif_xenbus_init(void)
798 { 810 {
799 return xenbus_register_backend(&xen_blkbk_driver); 811 return xenbus_register_backend(&xen_blkbk_driver);
800 } 812 }
801 813
drivers/block/xen-blkfront.c
1 /* 1 /*
2 * blkfront.c 2 * blkfront.c
3 * 3 *
4 * XenLinux virtual block device driver. 4 * XenLinux virtual block device driver.
5 * 5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach 8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield 9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2005, Christopher Clark
11 * Copyright (c) 2005, XenSource Ltd 11 * Copyright (c) 2005, XenSource Ltd
12 * 12 *
13 * This program is free software; you can redistribute it and/or 13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2 14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed 15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other 16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license: 17 * software packages, subject to the following license:
18 * 18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy 19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without 20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify, 21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to 23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions: 24 * the following conditions:
25 * 25 *
26 * The above copyright notice and this permission notice shall be included in 26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software. 27 * all copies or substantial portions of the Software.
28 * 28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE. 35 * IN THE SOFTWARE.
36 */ 36 */
37 37
38 #include <linux/interrupt.h> 38 #include <linux/interrupt.h>
39 #include <linux/blkdev.h> 39 #include <linux/blkdev.h>
40 #include <linux/hdreg.h> 40 #include <linux/hdreg.h>
41 #include <linux/cdrom.h> 41 #include <linux/cdrom.h>
42 #include <linux/module.h> 42 #include <linux/module.h>
43 #include <linux/slab.h> 43 #include <linux/slab.h>
44 #include <linux/mutex.h> 44 #include <linux/mutex.h>
45 #include <linux/scatterlist.h> 45 #include <linux/scatterlist.h>
46 46
47 #include <xen/xen.h> 47 #include <xen/xen.h>
48 #include <xen/xenbus.h> 48 #include <xen/xenbus.h>
49 #include <xen/grant_table.h> 49 #include <xen/grant_table.h>
50 #include <xen/events.h> 50 #include <xen/events.h>
51 #include <xen/page.h> 51 #include <xen/page.h>
52 #include <xen/platform_pci.h> 52 #include <xen/platform_pci.h>
53 53
54 #include <xen/interface/grant_table.h> 54 #include <xen/interface/grant_table.h>
55 #include <xen/interface/io/blkif.h> 55 #include <xen/interface/io/blkif.h>
56 #include <xen/interface/io/protocols.h> 56 #include <xen/interface/io/protocols.h>
57 57
58 #include <asm/xen/hypervisor.h> 58 #include <asm/xen/hypervisor.h>
59 59
60 enum blkif_state { 60 enum blkif_state {
61 BLKIF_STATE_DISCONNECTED, 61 BLKIF_STATE_DISCONNECTED,
62 BLKIF_STATE_CONNECTED, 62 BLKIF_STATE_CONNECTED,
63 BLKIF_STATE_SUSPENDED, 63 BLKIF_STATE_SUSPENDED,
64 }; 64 };
65 65
66 struct blk_shadow { 66 struct blk_shadow {
67 struct blkif_request req; 67 struct blkif_request req;
68 struct request *request; 68 struct request *request;
69 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 69 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
70 }; 70 };
71 71
72 static DEFINE_MUTEX(blkfront_mutex); 72 static DEFINE_MUTEX(blkfront_mutex);
73 static const struct block_device_operations xlvbd_block_fops; 73 static const struct block_device_operations xlvbd_block_fops;
74 74
75 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 75 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
76 76
77 /* 77 /*
78 * We have one of these per vbd, whether ide, scsi or 'other'. They 78 * We have one of these per vbd, whether ide, scsi or 'other'. They
79 * hang in private_data off the gendisk structure. We may end up 79 * hang in private_data off the gendisk structure. We may end up
80 * putting all kinds of interesting stuff here :-) 80 * putting all kinds of interesting stuff here :-)
81 */ 81 */
82 struct blkfront_info 82 struct blkfront_info
83 { 83 {
84 struct mutex mutex; 84 struct mutex mutex;
85 struct xenbus_device *xbdev; 85 struct xenbus_device *xbdev;
86 struct gendisk *gd; 86 struct gendisk *gd;
87 int vdevice; 87 int vdevice;
88 blkif_vdev_t handle; 88 blkif_vdev_t handle;
89 enum blkif_state connected; 89 enum blkif_state connected;
90 int ring_ref; 90 int ring_ref;
91 struct blkif_front_ring ring; 91 struct blkif_front_ring ring;
92 struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 92 struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
93 unsigned int evtchn, irq; 93 unsigned int evtchn, irq;
94 struct request_queue *rq; 94 struct request_queue *rq;
95 struct work_struct work; 95 struct work_struct work;
96 struct gnttab_free_callback callback; 96 struct gnttab_free_callback callback;
97 struct blk_shadow shadow[BLK_RING_SIZE]; 97 struct blk_shadow shadow[BLK_RING_SIZE];
98 unsigned long shadow_free; 98 unsigned long shadow_free;
99 unsigned int feature_flush; 99 unsigned int feature_flush;
100 unsigned int flush_op; 100 unsigned int flush_op;
101 unsigned int feature_discard; 101 unsigned int feature_discard:1;
102 unsigned int feature_secdiscard:1;
102 unsigned int discard_granularity; 103 unsigned int discard_granularity;
103 unsigned int discard_alignment; 104 unsigned int discard_alignment;
104 int is_ready; 105 int is_ready;
105 }; 106 };
106 107
107 static DEFINE_SPINLOCK(blkif_io_lock); 108 static DEFINE_SPINLOCK(blkif_io_lock);
108 109
109 static unsigned int nr_minors; 110 static unsigned int nr_minors;
110 static unsigned long *minors; 111 static unsigned long *minors;
111 static DEFINE_SPINLOCK(minor_lock); 112 static DEFINE_SPINLOCK(minor_lock);
112 113
113 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 114 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
114 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 115 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
115 #define GRANT_INVALID_REF 0 116 #define GRANT_INVALID_REF 0
116 117
117 #define PARTS_PER_DISK 16 118 #define PARTS_PER_DISK 16
118 #define PARTS_PER_EXT_DISK 256 119 #define PARTS_PER_EXT_DISK 256
119 120
120 #define BLKIF_MAJOR(dev) ((dev)>>8) 121 #define BLKIF_MAJOR(dev) ((dev)>>8)
121 #define BLKIF_MINOR(dev) ((dev) & 0xff) 122 #define BLKIF_MINOR(dev) ((dev) & 0xff)
122 123
123 #define EXT_SHIFT 28 124 #define EXT_SHIFT 28
124 #define EXTENDED (1<<EXT_SHIFT) 125 #define EXTENDED (1<<EXT_SHIFT)
125 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) 126 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
126 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 127 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
127 #define EMULATED_HD_DISK_MINOR_OFFSET (0) 128 #define EMULATED_HD_DISK_MINOR_OFFSET (0)
128 #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) 129 #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
129 #define EMULATED_SD_DISK_MINOR_OFFSET (0) 130 #define EMULATED_SD_DISK_MINOR_OFFSET (0)
130 #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256) 131 #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
131 132
132 #define DEV_NAME "xvd" /* name in /dev */ 133 #define DEV_NAME "xvd" /* name in /dev */
133 134
134 static int get_id_from_freelist(struct blkfront_info *info) 135 static int get_id_from_freelist(struct blkfront_info *info)
135 { 136 {
136 unsigned long free = info->shadow_free; 137 unsigned long free = info->shadow_free;
137 BUG_ON(free >= BLK_RING_SIZE); 138 BUG_ON(free >= BLK_RING_SIZE);
138 info->shadow_free = info->shadow[free].req.id; 139 info->shadow_free = info->shadow[free].req.u.rw.id;
139 info->shadow[free].req.id = 0x0fffffee; /* debug */ 140 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
140 return free; 141 return free;
141 } 142 }
142 143
143 static void add_id_to_freelist(struct blkfront_info *info, 144 static void add_id_to_freelist(struct blkfront_info *info,
144 unsigned long id) 145 unsigned long id)
145 { 146 {
146 info->shadow[id].req.id = info->shadow_free; 147 info->shadow[id].req.u.rw.id = info->shadow_free;
147 info->shadow[id].request = NULL; 148 info->shadow[id].request = NULL;
148 info->shadow_free = id; 149 info->shadow_free = id;
149 } 150 }
150 151
151 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) 152 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
152 { 153 {
153 unsigned int end = minor + nr; 154 unsigned int end = minor + nr;
154 int rc; 155 int rc;
155 156
156 if (end > nr_minors) { 157 if (end > nr_minors) {
157 unsigned long *bitmap, *old; 158 unsigned long *bitmap, *old;
158 159
159 bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), 160 bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
160 GFP_KERNEL); 161 GFP_KERNEL);
161 if (bitmap == NULL) 162 if (bitmap == NULL)
162 return -ENOMEM; 163 return -ENOMEM;
163 164
164 spin_lock(&minor_lock); 165 spin_lock(&minor_lock);
165 if (end > nr_minors) { 166 if (end > nr_minors) {
166 old = minors; 167 old = minors;
167 memcpy(bitmap, minors, 168 memcpy(bitmap, minors,
168 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); 169 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
169 minors = bitmap; 170 minors = bitmap;
170 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; 171 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
171 } else 172 } else
172 old = bitmap; 173 old = bitmap;
173 spin_unlock(&minor_lock); 174 spin_unlock(&minor_lock);
174 kfree(old); 175 kfree(old);
175 } 176 }
176 177
177 spin_lock(&minor_lock); 178 spin_lock(&minor_lock);
178 if (find_next_bit(minors, end, minor) >= end) { 179 if (find_next_bit(minors, end, minor) >= end) {
179 for (; minor < end; ++minor) 180 for (; minor < end; ++minor)
180 __set_bit(minor, minors); 181 __set_bit(minor, minors);
181 rc = 0; 182 rc = 0;
182 } else 183 } else
183 rc = -EBUSY; 184 rc = -EBUSY;
184 spin_unlock(&minor_lock); 185 spin_unlock(&minor_lock);
185 186
186 return rc; 187 return rc;
187 } 188 }
188 189
189 static void xlbd_release_minors(unsigned int minor, unsigned int nr) 190 static void xlbd_release_minors(unsigned int minor, unsigned int nr)
190 { 191 {
191 unsigned int end = minor + nr; 192 unsigned int end = minor + nr;
192 193
193 BUG_ON(end > nr_minors); 194 BUG_ON(end > nr_minors);
194 spin_lock(&minor_lock); 195 spin_lock(&minor_lock);
195 for (; minor < end; ++minor) 196 for (; minor < end; ++minor)
196 __clear_bit(minor, minors); 197 __clear_bit(minor, minors);
197 spin_unlock(&minor_lock); 198 spin_unlock(&minor_lock);
198 } 199 }
199 200
200 static void blkif_restart_queue_callback(void *arg) 201 static void blkif_restart_queue_callback(void *arg)
201 { 202 {
202 struct blkfront_info *info = (struct blkfront_info *)arg; 203 struct blkfront_info *info = (struct blkfront_info *)arg;
203 schedule_work(&info->work); 204 schedule_work(&info->work);
204 } 205 }
205 206
206 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) 207 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
207 { 208 {
208 /* We don't have real geometry info, but let's at least return 209 /* We don't have real geometry info, but let's at least return
209 values consistent with the size of the device */ 210 values consistent with the size of the device */
210 sector_t nsect = get_capacity(bd->bd_disk); 211 sector_t nsect = get_capacity(bd->bd_disk);
211 sector_t cylinders = nsect; 212 sector_t cylinders = nsect;
212 213
213 hg->heads = 0xff; 214 hg->heads = 0xff;
214 hg->sectors = 0x3f; 215 hg->sectors = 0x3f;
215 sector_div(cylinders, hg->heads * hg->sectors); 216 sector_div(cylinders, hg->heads * hg->sectors);
216 hg->cylinders = cylinders; 217 hg->cylinders = cylinders;
217 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) 218 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
218 hg->cylinders = 0xffff; 219 hg->cylinders = 0xffff;
219 return 0; 220 return 0;
220 } 221 }
221 222
222 static int blkif_ioctl(struct block_device *bdev, fmode_t mode, 223 static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
223 unsigned command, unsigned long argument) 224 unsigned command, unsigned long argument)
224 { 225 {
225 struct blkfront_info *info = bdev->bd_disk->private_data; 226 struct blkfront_info *info = bdev->bd_disk->private_data;
226 int i; 227 int i;
227 228
228 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", 229 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
229 command, (long)argument); 230 command, (long)argument);
230 231
231 switch (command) { 232 switch (command) {
232 case CDROMMULTISESSION: 233 case CDROMMULTISESSION:
233 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); 234 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
234 for (i = 0; i < sizeof(struct cdrom_multisession); i++) 235 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
235 if (put_user(0, (char __user *)(argument + i))) 236 if (put_user(0, (char __user *)(argument + i)))
236 return -EFAULT; 237 return -EFAULT;
237 return 0; 238 return 0;
238 239
239 case CDROM_GET_CAPABILITY: { 240 case CDROM_GET_CAPABILITY: {
240 struct gendisk *gd = info->gd; 241 struct gendisk *gd = info->gd;
241 if (gd->flags & GENHD_FL_CD) 242 if (gd->flags & GENHD_FL_CD)
242 return 0; 243 return 0;
243 return -EINVAL; 244 return -EINVAL;
244 } 245 }
245 246
246 default: 247 default:
247 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 248 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
248 command);*/ 249 command);*/
249 return -EINVAL; /* same return as native Linux */ 250 return -EINVAL; /* same return as native Linux */
250 } 251 }
251 252
252 return 0; 253 return 0;
253 } 254 }
254 255
255 /* 256 /*
256 * Generate a Xen blkfront IO request from a blk layer request. Reads 257 * Generate a Xen blkfront IO request from a blk layer request. Reads
257 * and writes are handled as expected. 258 * and writes are handled as expected.
258 * 259 *
259 * @req: a request struct 260 * @req: a request struct
260 */ 261 */
261 static int blkif_queue_request(struct request *req) 262 static int blkif_queue_request(struct request *req)
262 { 263 {
263 struct blkfront_info *info = req->rq_disk->private_data; 264 struct blkfront_info *info = req->rq_disk->private_data;
264 unsigned long buffer_mfn; 265 unsigned long buffer_mfn;
265 struct blkif_request *ring_req; 266 struct blkif_request *ring_req;
266 unsigned long id; 267 unsigned long id;
267 unsigned int fsect, lsect; 268 unsigned int fsect, lsect;
268 int i, ref; 269 int i, ref;
269 grant_ref_t gref_head; 270 grant_ref_t gref_head;
270 struct scatterlist *sg; 271 struct scatterlist *sg;
271 272
272 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 273 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
273 return 1; 274 return 1;
274 275
275 if (gnttab_alloc_grant_references( 276 if (gnttab_alloc_grant_references(
276 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 277 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
277 gnttab_request_free_callback( 278 gnttab_request_free_callback(
278 &info->callback, 279 &info->callback,
279 blkif_restart_queue_callback, 280 blkif_restart_queue_callback,
280 info, 281 info,
281 BLKIF_MAX_SEGMENTS_PER_REQUEST); 282 BLKIF_MAX_SEGMENTS_PER_REQUEST);
282 return 1; 283 return 1;
283 } 284 }
284 285
285 /* Fill out a communications ring structure. */ 286 /* Fill out a communications ring structure. */
286 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 287 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
287 id = get_id_from_freelist(info); 288 id = get_id_from_freelist(info);
288 info->shadow[id].request = req; 289 info->shadow[id].request = req;
289 290
290 ring_req->id = id; 291 ring_req->u.rw.id = id;
291 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); 292 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
292 ring_req->handle = info->handle; 293 ring_req->u.rw.handle = info->handle;
293 294
294 ring_req->operation = rq_data_dir(req) ? 295 ring_req->operation = rq_data_dir(req) ?
295 BLKIF_OP_WRITE : BLKIF_OP_READ; 296 BLKIF_OP_WRITE : BLKIF_OP_READ;
296 297
297 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { 298 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
298 /* 299 /*
299 * Ideally we can do an unordered flush-to-disk. In case the 300 * Ideally we can do an unordered flush-to-disk. In case the
300 * backend onlysupports barriers, use that. A barrier request 301 * backend onlysupports barriers, use that. A barrier request
301 * a superset of FUA, so we can implement it the same 302 * a superset of FUA, so we can implement it the same
302 * way. (It's also a FLUSH+FUA, since it is 303 * way. (It's also a FLUSH+FUA, since it is
303 * guaranteed ordered WRT previous writes.) 304 * guaranteed ordered WRT previous writes.)
304 */ 305 */
305 ring_req->operation = info->flush_op; 306 ring_req->operation = info->flush_op;
306 } 307 }
307 308
308 if (unlikely(req->cmd_flags & REQ_DISCARD)) { 309 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
309 /* id, sector_number and handle are set above. */ 310 /* id, sector_number and handle are set above. */
310 ring_req->operation = BLKIF_OP_DISCARD; 311 ring_req->operation = BLKIF_OP_DISCARD;
311 ring_req->nr_segments = 0;
312 ring_req->u.discard.nr_sectors = blk_rq_sectors(req); 312 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
313 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
314 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
315 else
316 ring_req->u.discard.flag = 0;
313 } else { 317 } else {
314 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 318 ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
315 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 319 info->sg);
320 BUG_ON(ring_req->u.rw.nr_segments >
321 BLKIF_MAX_SEGMENTS_PER_REQUEST);
316 322
317 for_each_sg(info->sg, sg, ring_req->nr_segments, i) { 323 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
318 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); 324 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
319 fsect = sg->offset >> 9; 325 fsect = sg->offset >> 9;
320 lsect = fsect + (sg->length >> 9) - 1; 326 lsect = fsect + (sg->length >> 9) - 1;
321 /* install a grant reference. */ 327 /* install a grant reference. */
322 ref = gnttab_claim_grant_reference(&gref_head); 328 ref = gnttab_claim_grant_reference(&gref_head);
323 BUG_ON(ref == -ENOSPC); 329 BUG_ON(ref == -ENOSPC);
324 330
325 gnttab_grant_foreign_access_ref( 331 gnttab_grant_foreign_access_ref(
326 ref, 332 ref,
327 info->xbdev->otherend_id, 333 info->xbdev->otherend_id,
328 buffer_mfn, 334 buffer_mfn,
329 rq_data_dir(req)); 335 rq_data_dir(req));
330 336
331 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 337 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
332 ring_req->u.rw.seg[i] = 338 ring_req->u.rw.seg[i] =
333 (struct blkif_request_segment) { 339 (struct blkif_request_segment) {
334 .gref = ref, 340 .gref = ref,
335 .first_sect = fsect, 341 .first_sect = fsect,
336 .last_sect = lsect }; 342 .last_sect = lsect };
337 } 343 }
338 } 344 }
339 345
340 info->ring.req_prod_pvt++; 346 info->ring.req_prod_pvt++;
341 347
342 /* Keep a private copy so we can reissue requests when recovering. */ 348 /* Keep a private copy so we can reissue requests when recovering. */
343 info->shadow[id].req = *ring_req; 349 info->shadow[id].req = *ring_req;
344 350
345 gnttab_free_grant_references(gref_head); 351 gnttab_free_grant_references(gref_head);
346 352
347 return 0; 353 return 0;
348 } 354 }
349 355
350 356
351 static inline void flush_requests(struct blkfront_info *info) 357 static inline void flush_requests(struct blkfront_info *info)
352 { 358 {
353 int notify; 359 int notify;
354 360
355 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 361 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
356 362
357 if (notify) 363 if (notify)
358 notify_remote_via_irq(info->irq); 364 notify_remote_via_irq(info->irq);
359 } 365 }
360 366
361 /* 367 /*
362 * do_blkif_request 368 * do_blkif_request
363 * read a block; request is in a request queue 369 * read a block; request is in a request queue
364 */ 370 */
365 static void do_blkif_request(struct request_queue *rq) 371 static void do_blkif_request(struct request_queue *rq)
366 { 372 {
367 struct blkfront_info *info = NULL; 373 struct blkfront_info *info = NULL;
368 struct request *req; 374 struct request *req;
369 int queued; 375 int queued;
370 376
371 pr_debug("Entered do_blkif_request\n"); 377 pr_debug("Entered do_blkif_request\n");
372 378
373 queued = 0; 379 queued = 0;
374 380
375 while ((req = blk_peek_request(rq)) != NULL) { 381 while ((req = blk_peek_request(rq)) != NULL) {
376 info = req->rq_disk->private_data; 382 info = req->rq_disk->private_data;
377 383
378 if (RING_FULL(&info->ring)) 384 if (RING_FULL(&info->ring))
379 goto wait; 385 goto wait;
380 386
381 blk_start_request(req); 387 blk_start_request(req);
382 388
383 if ((req->cmd_type != REQ_TYPE_FS) || 389 if ((req->cmd_type != REQ_TYPE_FS) ||
384 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && 390 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
385 !info->flush_op)) { 391 !info->flush_op)) {
386 __blk_end_request_all(req, -EIO); 392 __blk_end_request_all(req, -EIO);
387 continue; 393 continue;
388 } 394 }
389 395
390 pr_debug("do_blk_req %p: cmd %p, sec %lx, " 396 pr_debug("do_blk_req %p: cmd %p, sec %lx, "
391 "(%u/%u) buffer:%p [%s]\n", 397 "(%u/%u) buffer:%p [%s]\n",
392 req, req->cmd, (unsigned long)blk_rq_pos(req), 398 req, req->cmd, (unsigned long)blk_rq_pos(req),
393 blk_rq_cur_sectors(req), blk_rq_sectors(req), 399 blk_rq_cur_sectors(req), blk_rq_sectors(req),
394 req->buffer, rq_data_dir(req) ? "write" : "read"); 400 req->buffer, rq_data_dir(req) ? "write" : "read");
395 401
396 if (blkif_queue_request(req)) { 402 if (blkif_queue_request(req)) {
397 blk_requeue_request(rq, req); 403 blk_requeue_request(rq, req);
398 wait: 404 wait:
399 /* Avoid pointless unplugs. */ 405 /* Avoid pointless unplugs. */
400 blk_stop_queue(rq); 406 blk_stop_queue(rq);
401 break; 407 break;
402 } 408 }
403 409
404 queued++; 410 queued++;
405 } 411 }
406 412
407 if (queued != 0) 413 if (queued != 0)
408 flush_requests(info); 414 flush_requests(info);
409 } 415 }
410 416
411 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) 417 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
412 { 418 {
413 struct request_queue *rq; 419 struct request_queue *rq;
414 struct blkfront_info *info = gd->private_data; 420 struct blkfront_info *info = gd->private_data;
415 421
416 rq = blk_init_queue(do_blkif_request, &blkif_io_lock); 422 rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
417 if (rq == NULL) 423 if (rq == NULL)
418 return -1; 424 return -1;
419 425
420 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); 426 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
421 427
422 if (info->feature_discard) { 428 if (info->feature_discard) {
423 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); 429 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
424 blk_queue_max_discard_sectors(rq, get_capacity(gd)); 430 blk_queue_max_discard_sectors(rq, get_capacity(gd));
425 rq->limits.discard_granularity = info->discard_granularity; 431 rq->limits.discard_granularity = info->discard_granularity;
426 rq->limits.discard_alignment = info->discard_alignment; 432 rq->limits.discard_alignment = info->discard_alignment;
433 if (info->feature_secdiscard)
434 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
427 } 435 }
428 436
429 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 437 /* Hard sector size and max sectors impersonate the equiv. hardware. */
430 blk_queue_logical_block_size(rq, sector_size); 438 blk_queue_logical_block_size(rq, sector_size);
431 blk_queue_max_hw_sectors(rq, 512); 439 blk_queue_max_hw_sectors(rq, 512);
432 440
433 /* Each segment in a request is up to an aligned page in size. */ 441 /* Each segment in a request is up to an aligned page in size. */
434 blk_queue_segment_boundary(rq, PAGE_SIZE - 1); 442 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
435 blk_queue_max_segment_size(rq, PAGE_SIZE); 443 blk_queue_max_segment_size(rq, PAGE_SIZE);
436 444
437 /* Ensure a merged request will fit in a single I/O ring slot. */ 445 /* Ensure a merged request will fit in a single I/O ring slot. */
438 blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); 446 blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
439 447
440 /* Make sure buffer addresses are sector-aligned. */ 448 /* Make sure buffer addresses are sector-aligned. */
441 blk_queue_dma_alignment(rq, 511); 449 blk_queue_dma_alignment(rq, 511);
442 450
443 /* Make sure we don't use bounce buffers. */ 451 /* Make sure we don't use bounce buffers. */
444 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); 452 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
445 453
446 gd->queue = rq; 454 gd->queue = rq;
447 455
448 return 0; 456 return 0;
449 } 457 }
450 458
451 459
452 static void xlvbd_flush(struct blkfront_info *info) 460 static void xlvbd_flush(struct blkfront_info *info)
453 { 461 {
454 blk_queue_flush(info->rq, info->feature_flush); 462 blk_queue_flush(info->rq, info->feature_flush);
455 printk(KERN_INFO "blkfront: %s: %s: %s\n", 463 printk(KERN_INFO "blkfront: %s: %s: %s\n",
456 info->gd->disk_name, 464 info->gd->disk_name,
457 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 465 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
458 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? 466 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
459 "flush diskcache" : "barrier or flush"), 467 "flush diskcache" : "barrier or flush"),
460 info->feature_flush ? "enabled" : "disabled"); 468 info->feature_flush ? "enabled" : "disabled");
461 } 469 }
462 470
463 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) 471 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
464 { 472 {
465 int major; 473 int major;
466 major = BLKIF_MAJOR(vdevice); 474 major = BLKIF_MAJOR(vdevice);
467 *minor = BLKIF_MINOR(vdevice); 475 *minor = BLKIF_MINOR(vdevice);
468 switch (major) { 476 switch (major) {
469 case XEN_IDE0_MAJOR: 477 case XEN_IDE0_MAJOR:
470 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET; 478 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
471 *minor = ((*minor / 64) * PARTS_PER_DISK) + 479 *minor = ((*minor / 64) * PARTS_PER_DISK) +
472 EMULATED_HD_DISK_MINOR_OFFSET; 480 EMULATED_HD_DISK_MINOR_OFFSET;
473 break; 481 break;
474 case XEN_IDE1_MAJOR: 482 case XEN_IDE1_MAJOR:
475 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET; 483 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
476 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) + 484 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
477 EMULATED_HD_DISK_MINOR_OFFSET; 485 EMULATED_HD_DISK_MINOR_OFFSET;
478 break; 486 break;
479 case XEN_SCSI_DISK0_MAJOR: 487 case XEN_SCSI_DISK0_MAJOR:
480 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET; 488 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
481 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET; 489 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
482 break; 490 break;
483 case XEN_SCSI_DISK1_MAJOR: 491 case XEN_SCSI_DISK1_MAJOR:
484 case XEN_SCSI_DISK2_MAJOR: 492 case XEN_SCSI_DISK2_MAJOR:
485 case XEN_SCSI_DISK3_MAJOR: 493 case XEN_SCSI_DISK3_MAJOR:
486 case XEN_SCSI_DISK4_MAJOR: 494 case XEN_SCSI_DISK4_MAJOR:
487 case XEN_SCSI_DISK5_MAJOR: 495 case XEN_SCSI_DISK5_MAJOR:
488 case XEN_SCSI_DISK6_MAJOR: 496 case XEN_SCSI_DISK6_MAJOR:
489 case XEN_SCSI_DISK7_MAJOR: 497 case XEN_SCSI_DISK7_MAJOR:
490 *offset = (*minor / PARTS_PER_DISK) + 498 *offset = (*minor / PARTS_PER_DISK) +
491 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) + 499 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
492 EMULATED_SD_DISK_NAME_OFFSET; 500 EMULATED_SD_DISK_NAME_OFFSET;
493 *minor = *minor + 501 *minor = *minor +
494 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) + 502 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
495 EMULATED_SD_DISK_MINOR_OFFSET; 503 EMULATED_SD_DISK_MINOR_OFFSET;
496 break; 504 break;
497 case XEN_SCSI_DISK8_MAJOR: 505 case XEN_SCSI_DISK8_MAJOR:
498 case XEN_SCSI_DISK9_MAJOR: 506 case XEN_SCSI_DISK9_MAJOR:
499 case XEN_SCSI_DISK10_MAJOR: 507 case XEN_SCSI_DISK10_MAJOR:
500 case XEN_SCSI_DISK11_MAJOR: 508 case XEN_SCSI_DISK11_MAJOR:
501 case XEN_SCSI_DISK12_MAJOR: 509 case XEN_SCSI_DISK12_MAJOR:
502 case XEN_SCSI_DISK13_MAJOR: 510 case XEN_SCSI_DISK13_MAJOR:
503 case XEN_SCSI_DISK14_MAJOR: 511 case XEN_SCSI_DISK14_MAJOR:
504 case XEN_SCSI_DISK15_MAJOR: 512 case XEN_SCSI_DISK15_MAJOR:
505 *offset = (*minor / PARTS_PER_DISK) + 513 *offset = (*minor / PARTS_PER_DISK) +
506 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) + 514 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
507 EMULATED_SD_DISK_NAME_OFFSET; 515 EMULATED_SD_DISK_NAME_OFFSET;
508 *minor = *minor + 516 *minor = *minor +
509 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) + 517 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
510 EMULATED_SD_DISK_MINOR_OFFSET; 518 EMULATED_SD_DISK_MINOR_OFFSET;
511 break; 519 break;
512 case XENVBD_MAJOR: 520 case XENVBD_MAJOR:
513 *offset = *minor / PARTS_PER_DISK; 521 *offset = *minor / PARTS_PER_DISK;
514 break; 522 break;
515 default: 523 default:
516 printk(KERN_WARNING "blkfront: your disk configuration is " 524 printk(KERN_WARNING "blkfront: your disk configuration is "
517 "incorrect, please use an xvd device instead\n"); 525 "incorrect, please use an xvd device instead\n");
518 return -ENODEV; 526 return -ENODEV;
519 } 527 }
520 return 0; 528 return 0;
521 } 529 }
522 530
523 static int xlvbd_alloc_gendisk(blkif_sector_t capacity, 531 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
524 struct blkfront_info *info, 532 struct blkfront_info *info,
525 u16 vdisk_info, u16 sector_size) 533 u16 vdisk_info, u16 sector_size)
526 { 534 {
527 struct gendisk *gd; 535 struct gendisk *gd;
528 int nr_minors = 1; 536 int nr_minors = 1;
529 int err; 537 int err;
530 unsigned int offset; 538 unsigned int offset;
531 int minor; 539 int minor;
532 int nr_parts; 540 int nr_parts;
533 541
534 BUG_ON(info->gd != NULL); 542 BUG_ON(info->gd != NULL);
535 BUG_ON(info->rq != NULL); 543 BUG_ON(info->rq != NULL);
536 544
537 if ((info->vdevice>>EXT_SHIFT) > 1) { 545 if ((info->vdevice>>EXT_SHIFT) > 1) {
538 /* this is above the extended range; something is wrong */ 546 /* this is above the extended range; something is wrong */
539 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); 547 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
540 return -ENODEV; 548 return -ENODEV;
541 } 549 }
542 550
543 if (!VDEV_IS_EXTENDED(info->vdevice)) { 551 if (!VDEV_IS_EXTENDED(info->vdevice)) {
544 err = xen_translate_vdev(info->vdevice, &minor, &offset); 552 err = xen_translate_vdev(info->vdevice, &minor, &offset);
545 if (err) 553 if (err)
546 return err; 554 return err;
547 nr_parts = PARTS_PER_DISK; 555 nr_parts = PARTS_PER_DISK;
548 } else { 556 } else {
549 minor = BLKIF_MINOR_EXT(info->vdevice); 557 minor = BLKIF_MINOR_EXT(info->vdevice);
550 nr_parts = PARTS_PER_EXT_DISK; 558 nr_parts = PARTS_PER_EXT_DISK;
551 offset = minor / nr_parts; 559 offset = minor / nr_parts;
552 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4) 560 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
553 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " 561 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
554 "emulated IDE disks,\n\t choose an xvd device name" 562 "emulated IDE disks,\n\t choose an xvd device name"
555 "from xvde on\n", info->vdevice); 563 "from xvde on\n", info->vdevice);
556 } 564 }
557 err = -ENODEV; 565 err = -ENODEV;
558 566
559 if ((minor % nr_parts) == 0) 567 if ((minor % nr_parts) == 0)
560 nr_minors = nr_parts; 568 nr_minors = nr_parts;
561 569
562 err = xlbd_reserve_minors(minor, nr_minors); 570 err = xlbd_reserve_minors(minor, nr_minors);
563 if (err) 571 if (err)
564 goto out; 572 goto out;
565 err = -ENODEV; 573 err = -ENODEV;
566 574
567 gd = alloc_disk(nr_minors); 575 gd = alloc_disk(nr_minors);
568 if (gd == NULL) 576 if (gd == NULL)
569 goto release; 577 goto release;
570 578
571 if (nr_minors > 1) { 579 if (nr_minors > 1) {
572 if (offset < 26) 580 if (offset < 26)
573 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); 581 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
574 else 582 else
575 sprintf(gd->disk_name, "%s%c%c", DEV_NAME, 583 sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
576 'a' + ((offset / 26)-1), 'a' + (offset % 26)); 584 'a' + ((offset / 26)-1), 'a' + (offset % 26));
577 } else { 585 } else {
578 if (offset < 26) 586 if (offset < 26)
579 sprintf(gd->disk_name, "%s%c%d", DEV_NAME, 587 sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
580 'a' + offset, 588 'a' + offset,
581 minor & (nr_parts - 1)); 589 minor & (nr_parts - 1));
582 else 590 else
583 sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, 591 sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
584 'a' + ((offset / 26) - 1), 592 'a' + ((offset / 26) - 1),
585 'a' + (offset % 26), 593 'a' + (offset % 26),
586 minor & (nr_parts - 1)); 594 minor & (nr_parts - 1));
587 } 595 }
588 596
589 gd->major = XENVBD_MAJOR; 597 gd->major = XENVBD_MAJOR;
590 gd->first_minor = minor; 598 gd->first_minor = minor;
591 gd->fops = &xlvbd_block_fops; 599 gd->fops = &xlvbd_block_fops;
592 gd->private_data = info; 600 gd->private_data = info;
593 gd->driverfs_dev = &(info->xbdev->dev); 601 gd->driverfs_dev = &(info->xbdev->dev);
594 set_capacity(gd, capacity); 602 set_capacity(gd, capacity);
595 603
596 if (xlvbd_init_blk_queue(gd, sector_size)) { 604 if (xlvbd_init_blk_queue(gd, sector_size)) {
597 del_gendisk(gd); 605 del_gendisk(gd);
598 goto release; 606 goto release;
599 } 607 }
600 608
601 info->rq = gd->queue; 609 info->rq = gd->queue;
602 info->gd = gd; 610 info->gd = gd;
603 611
604 xlvbd_flush(info); 612 xlvbd_flush(info);
605 613
606 if (vdisk_info & VDISK_READONLY) 614 if (vdisk_info & VDISK_READONLY)
607 set_disk_ro(gd, 1); 615 set_disk_ro(gd, 1);
608 616
609 if (vdisk_info & VDISK_REMOVABLE) 617 if (vdisk_info & VDISK_REMOVABLE)
610 gd->flags |= GENHD_FL_REMOVABLE; 618 gd->flags |= GENHD_FL_REMOVABLE;
611 619
612 if (vdisk_info & VDISK_CDROM) 620 if (vdisk_info & VDISK_CDROM)
613 gd->flags |= GENHD_FL_CD; 621 gd->flags |= GENHD_FL_CD;
614 622
615 return 0; 623 return 0;
616 624
617 release: 625 release:
618 xlbd_release_minors(minor, nr_minors); 626 xlbd_release_minors(minor, nr_minors);
619 out: 627 out:
620 return err; 628 return err;
621 } 629 }
622 630
623 static void xlvbd_release_gendisk(struct blkfront_info *info) 631 static void xlvbd_release_gendisk(struct blkfront_info *info)
624 { 632 {
625 unsigned int minor, nr_minors; 633 unsigned int minor, nr_minors;
626 unsigned long flags; 634 unsigned long flags;
627 635
628 if (info->rq == NULL) 636 if (info->rq == NULL)
629 return; 637 return;
630 638
631 spin_lock_irqsave(&blkif_io_lock, flags); 639 spin_lock_irqsave(&blkif_io_lock, flags);
632 640
633 /* No more blkif_request(). */ 641 /* No more blkif_request(). */
634 blk_stop_queue(info->rq); 642 blk_stop_queue(info->rq);
635 643
636 /* No more gnttab callback work. */ 644 /* No more gnttab callback work. */
637 gnttab_cancel_free_callback(&info->callback); 645 gnttab_cancel_free_callback(&info->callback);
638 spin_unlock_irqrestore(&blkif_io_lock, flags); 646 spin_unlock_irqrestore(&blkif_io_lock, flags);
639 647
640 /* Flush gnttab callback work. Must be done with no locks held. */ 648 /* Flush gnttab callback work. Must be done with no locks held. */
641 flush_work_sync(&info->work); 649 flush_work_sync(&info->work);
642 650
643 del_gendisk(info->gd); 651 del_gendisk(info->gd);
644 652
645 minor = info->gd->first_minor; 653 minor = info->gd->first_minor;
646 nr_minors = info->gd->minors; 654 nr_minors = info->gd->minors;
647 xlbd_release_minors(minor, nr_minors); 655 xlbd_release_minors(minor, nr_minors);
648 656
649 blk_cleanup_queue(info->rq); 657 blk_cleanup_queue(info->rq);
650 info->rq = NULL; 658 info->rq = NULL;
651 659
652 put_disk(info->gd); 660 put_disk(info->gd);
653 info->gd = NULL; 661 info->gd = NULL;
654 } 662 }
655 663
656 static void kick_pending_request_queues(struct blkfront_info *info) 664 static void kick_pending_request_queues(struct blkfront_info *info)
657 { 665 {
658 if (!RING_FULL(&info->ring)) { 666 if (!RING_FULL(&info->ring)) {
659 /* Re-enable calldowns. */ 667 /* Re-enable calldowns. */
660 blk_start_queue(info->rq); 668 blk_start_queue(info->rq);
661 /* Kick things off immediately. */ 669 /* Kick things off immediately. */
662 do_blkif_request(info->rq); 670 do_blkif_request(info->rq);
663 } 671 }
664 } 672 }
665 673
666 static void blkif_restart_queue(struct work_struct *work) 674 static void blkif_restart_queue(struct work_struct *work)
667 { 675 {
668 struct blkfront_info *info = container_of(work, struct blkfront_info, work); 676 struct blkfront_info *info = container_of(work, struct blkfront_info, work);
669 677
670 spin_lock_irq(&blkif_io_lock); 678 spin_lock_irq(&blkif_io_lock);
671 if (info->connected == BLKIF_STATE_CONNECTED) 679 if (info->connected == BLKIF_STATE_CONNECTED)
672 kick_pending_request_queues(info); 680 kick_pending_request_queues(info);
673 spin_unlock_irq(&blkif_io_lock); 681 spin_unlock_irq(&blkif_io_lock);
674 } 682 }
675 683
676 static void blkif_free(struct blkfront_info *info, int suspend) 684 static void blkif_free(struct blkfront_info *info, int suspend)
677 { 685 {
678 /* Prevent new requests being issued until we fix things up. */ 686 /* Prevent new requests being issued until we fix things up. */
679 spin_lock_irq(&blkif_io_lock); 687 spin_lock_irq(&blkif_io_lock);
680 info->connected = suspend ? 688 info->connected = suspend ?
681 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 689 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
682 /* No more blkif_request(). */ 690 /* No more blkif_request(). */
683 if (info->rq) 691 if (info->rq)
684 blk_stop_queue(info->rq); 692 blk_stop_queue(info->rq);
685 /* No more gnttab callback work. */ 693 /* No more gnttab callback work. */
686 gnttab_cancel_free_callback(&info->callback); 694 gnttab_cancel_free_callback(&info->callback);
687 spin_unlock_irq(&blkif_io_lock); 695 spin_unlock_irq(&blkif_io_lock);
688 696
689 /* Flush gnttab callback work. Must be done with no locks held. */ 697 /* Flush gnttab callback work. Must be done with no locks held. */
690 flush_work_sync(&info->work); 698 flush_work_sync(&info->work);
691 699
692 /* Free resources associated with old device channel. */ 700 /* Free resources associated with old device channel. */
693 if (info->ring_ref != GRANT_INVALID_REF) { 701 if (info->ring_ref != GRANT_INVALID_REF) {
694 gnttab_end_foreign_access(info->ring_ref, 0, 702 gnttab_end_foreign_access(info->ring_ref, 0,
695 (unsigned long)info->ring.sring); 703 (unsigned long)info->ring.sring);
696 info->ring_ref = GRANT_INVALID_REF; 704 info->ring_ref = GRANT_INVALID_REF;
697 info->ring.sring = NULL; 705 info->ring.sring = NULL;
698 } 706 }
699 if (info->irq) 707 if (info->irq)
700 unbind_from_irqhandler(info->irq, info); 708 unbind_from_irqhandler(info->irq, info);
701 info->evtchn = info->irq = 0; 709 info->evtchn = info->irq = 0;
702 710
703 } 711 }
704 712
705 static void blkif_completion(struct blk_shadow *s) 713 static void blkif_completion(struct blk_shadow *s)
706 { 714 {
707 int i; 715 int i;
708 for (i = 0; i < s->req.nr_segments; i++) 716 /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
717 * flag. */
718 for (i = 0; i < s->req.u.rw.nr_segments; i++)
709 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); 719 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
710 } 720 }
711 721
712 static irqreturn_t blkif_interrupt(int irq, void *dev_id) 722 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
713 { 723 {
714 struct request *req; 724 struct request *req;
715 struct blkif_response *bret; 725 struct blkif_response *bret;
716 RING_IDX i, rp; 726 RING_IDX i, rp;
717 unsigned long flags; 727 unsigned long flags;
718 struct blkfront_info *info = (struct blkfront_info *)dev_id; 728 struct blkfront_info *info = (struct blkfront_info *)dev_id;
719 int error; 729 int error;
720 730
721 spin_lock_irqsave(&blkif_io_lock, flags); 731 spin_lock_irqsave(&blkif_io_lock, flags);
722 732
723 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 733 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
724 spin_unlock_irqrestore(&blkif_io_lock, flags); 734 spin_unlock_irqrestore(&blkif_io_lock, flags);
725 return IRQ_HANDLED; 735 return IRQ_HANDLED;
726 } 736 }
727 737
728 again: 738 again:
729 rp = info->ring.sring->rsp_prod; 739 rp = info->ring.sring->rsp_prod;
730 rmb(); /* Ensure we see queued responses up to 'rp'. */ 740 rmb(); /* Ensure we see queued responses up to 'rp'. */
731 741
732 for (i = info->ring.rsp_cons; i != rp; i++) { 742 for (i = info->ring.rsp_cons; i != rp; i++) {
733 unsigned long id; 743 unsigned long id;
734 744
735 bret = RING_GET_RESPONSE(&info->ring, i); 745 bret = RING_GET_RESPONSE(&info->ring, i);
736 id = bret->id; 746 id = bret->id;
737 req = info->shadow[id].request; 747 req = info->shadow[id].request;
738 748
739 blkif_completion(&info->shadow[id]); 749 if (bret->operation != BLKIF_OP_DISCARD)
750 blkif_completion(&info->shadow[id]);
740 751
741 add_id_to_freelist(info, id); 752 add_id_to_freelist(info, id);
742 753
743 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; 754 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
744 switch (bret->operation) { 755 switch (bret->operation) {
745 case BLKIF_OP_DISCARD: 756 case BLKIF_OP_DISCARD:
746 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 757 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
747 struct request_queue *rq = info->rq; 758 struct request_queue *rq = info->rq;
748 printk(KERN_WARNING "blkfront: %s: discard op failed\n", 759 printk(KERN_WARNING "blkfront: %s: discard op failed\n",
749 info->gd->disk_name); 760 info->gd->disk_name);
750 error = -EOPNOTSUPP; 761 error = -EOPNOTSUPP;
751 info->feature_discard = 0; 762 info->feature_discard = 0;
763 info->feature_secdiscard = 0;
752 queue_flag_clear(QUEUE_FLAG_DISCARD, rq); 764 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
765 queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
753 } 766 }
754 __blk_end_request_all(req, error); 767 __blk_end_request_all(req, error);
755 break; 768 break;
756 case BLKIF_OP_FLUSH_DISKCACHE: 769 case BLKIF_OP_FLUSH_DISKCACHE:
757 case BLKIF_OP_WRITE_BARRIER: 770 case BLKIF_OP_WRITE_BARRIER:
758 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 771 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
759 printk(KERN_WARNING "blkfront: %s: write %s op failed\n", 772 printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
760 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 773 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
761 "barrier" : "flush disk cache", 774 "barrier" : "flush disk cache",
762 info->gd->disk_name); 775 info->gd->disk_name);
763 error = -EOPNOTSUPP; 776 error = -EOPNOTSUPP;
764 } 777 }
765 if (unlikely(bret->status == BLKIF_RSP_ERROR && 778 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
766 info->shadow[id].req.nr_segments == 0)) { 779 info->shadow[id].req.u.rw.nr_segments == 0)) {
767 printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", 780 printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
768 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 781 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
769 "barrier" : "flush disk cache", 782 "barrier" : "flush disk cache",
770 info->gd->disk_name); 783 info->gd->disk_name);
771 error = -EOPNOTSUPP; 784 error = -EOPNOTSUPP;
772 } 785 }
773 if (unlikely(error)) { 786 if (unlikely(error)) {
774 if (error == -EOPNOTSUPP) 787 if (error == -EOPNOTSUPP)
775 error = 0; 788 error = 0;
776 info->feature_flush = 0; 789 info->feature_flush = 0;
777 info->flush_op = 0; 790 info->flush_op = 0;
778 xlvbd_flush(info); 791 xlvbd_flush(info);
779 } 792 }
780 /* fall through */ 793 /* fall through */
781 case BLKIF_OP_READ: 794 case BLKIF_OP_READ:
782 case BLKIF_OP_WRITE: 795 case BLKIF_OP_WRITE:
783 if (unlikely(bret->status != BLKIF_RSP_OKAY)) 796 if (unlikely(bret->status != BLKIF_RSP_OKAY))
784 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " 797 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
785 "request: %x\n", bret->status); 798 "request: %x\n", bret->status);
786 799
787 __blk_end_request_all(req, error); 800 __blk_end_request_all(req, error);
788 break; 801 break;
789 default: 802 default:
790 BUG(); 803 BUG();
791 } 804 }
792 } 805 }
793 806
794 info->ring.rsp_cons = i; 807 info->ring.rsp_cons = i;
795 808
796 if (i != info->ring.req_prod_pvt) { 809 if (i != info->ring.req_prod_pvt) {
797 int more_to_do; 810 int more_to_do;
798 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 811 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
799 if (more_to_do) 812 if (more_to_do)
800 goto again; 813 goto again;
801 } else 814 } else
802 info->ring.sring->rsp_event = i + 1; 815 info->ring.sring->rsp_event = i + 1;
803 816
804 kick_pending_request_queues(info); 817 kick_pending_request_queues(info);
805 818
806 spin_unlock_irqrestore(&blkif_io_lock, flags); 819 spin_unlock_irqrestore(&blkif_io_lock, flags);
807 820
808 return IRQ_HANDLED; 821 return IRQ_HANDLED;
809 } 822 }
810 823
811 824
812 static int setup_blkring(struct xenbus_device *dev, 825 static int setup_blkring(struct xenbus_device *dev,
813 struct blkfront_info *info) 826 struct blkfront_info *info)
814 { 827 {
815 struct blkif_sring *sring; 828 struct blkif_sring *sring;
816 int err; 829 int err;
817 830
818 info->ring_ref = GRANT_INVALID_REF; 831 info->ring_ref = GRANT_INVALID_REF;
819 832
820 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); 833 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
821 if (!sring) { 834 if (!sring) {
822 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); 835 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
823 return -ENOMEM; 836 return -ENOMEM;
824 } 837 }
825 SHARED_RING_INIT(sring); 838 SHARED_RING_INIT(sring);
826 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 839 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
827 840
828 sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); 841 sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
829 842
830 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); 843 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
831 if (err < 0) { 844 if (err < 0) {
832 free_page((unsigned long)sring); 845 free_page((unsigned long)sring);
833 info->ring.sring = NULL; 846 info->ring.sring = NULL;
834 goto fail; 847 goto fail;
835 } 848 }
836 info->ring_ref = err; 849 info->ring_ref = err;
837 850
838 err = xenbus_alloc_evtchn(dev, &info->evtchn); 851 err = xenbus_alloc_evtchn(dev, &info->evtchn);
839 if (err) 852 if (err)
840 goto fail; 853 goto fail;
841 854
842 err = bind_evtchn_to_irqhandler(info->evtchn, 855 err = bind_evtchn_to_irqhandler(info->evtchn,
843 blkif_interrupt, 856 blkif_interrupt,
844 IRQF_SAMPLE_RANDOM, "blkif", info); 857 IRQF_SAMPLE_RANDOM, "blkif", info);
845 if (err <= 0) { 858 if (err <= 0) {
846 xenbus_dev_fatal(dev, err, 859 xenbus_dev_fatal(dev, err,
847 "bind_evtchn_to_irqhandler failed"); 860 "bind_evtchn_to_irqhandler failed");
848 goto fail; 861 goto fail;
849 } 862 }
850 info->irq = err; 863 info->irq = err;
851 864
852 return 0; 865 return 0;
853 fail: 866 fail:
854 blkif_free(info, 0); 867 blkif_free(info, 0);
855 return err; 868 return err;
856 } 869 }
857 870
858 871
859 /* Common code used when first setting up, and when resuming. */ 872 /* Common code used when first setting up, and when resuming. */
860 static int talk_to_blkback(struct xenbus_device *dev, 873 static int talk_to_blkback(struct xenbus_device *dev,
861 struct blkfront_info *info) 874 struct blkfront_info *info)
862 { 875 {
863 const char *message = NULL; 876 const char *message = NULL;
864 struct xenbus_transaction xbt; 877 struct xenbus_transaction xbt;
865 int err; 878 int err;
866 879
867 /* Create shared ring, alloc event channel. */ 880 /* Create shared ring, alloc event channel. */
868 err = setup_blkring(dev, info); 881 err = setup_blkring(dev, info);
869 if (err) 882 if (err)
870 goto out; 883 goto out;
871 884
872 again: 885 again:
873 err = xenbus_transaction_start(&xbt); 886 err = xenbus_transaction_start(&xbt);
874 if (err) { 887 if (err) {
875 xenbus_dev_fatal(dev, err, "starting transaction"); 888 xenbus_dev_fatal(dev, err, "starting transaction");
876 goto destroy_blkring; 889 goto destroy_blkring;
877 } 890 }
878 891
879 err = xenbus_printf(xbt, dev->nodename, 892 err = xenbus_printf(xbt, dev->nodename,
880 "ring-ref", "%u", info->ring_ref); 893 "ring-ref", "%u", info->ring_ref);
881 if (err) { 894 if (err) {
882 message = "writing ring-ref"; 895 message = "writing ring-ref";
883 goto abort_transaction; 896 goto abort_transaction;
884 } 897 }
885 err = xenbus_printf(xbt, dev->nodename, 898 err = xenbus_printf(xbt, dev->nodename,
886 "event-channel", "%u", info->evtchn); 899 "event-channel", "%u", info->evtchn);
887 if (err) { 900 if (err) {
888 message = "writing event-channel"; 901 message = "writing event-channel";
889 goto abort_transaction; 902 goto abort_transaction;
890 } 903 }
891 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", 904 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
892 XEN_IO_PROTO_ABI_NATIVE); 905 XEN_IO_PROTO_ABI_NATIVE);
893 if (err) { 906 if (err) {
894 message = "writing protocol"; 907 message = "writing protocol";
895 goto abort_transaction; 908 goto abort_transaction;
896 } 909 }
897 910
898 err = xenbus_transaction_end(xbt, 0); 911 err = xenbus_transaction_end(xbt, 0);
899 if (err) { 912 if (err) {
900 if (err == -EAGAIN) 913 if (err == -EAGAIN)
901 goto again; 914 goto again;
902 xenbus_dev_fatal(dev, err, "completing transaction"); 915 xenbus_dev_fatal(dev, err, "completing transaction");
903 goto destroy_blkring; 916 goto destroy_blkring;
904 } 917 }
905 918
906 xenbus_switch_state(dev, XenbusStateInitialised); 919 xenbus_switch_state(dev, XenbusStateInitialised);
907 920
908 return 0; 921 return 0;
909 922
910 abort_transaction: 923 abort_transaction:
911 xenbus_transaction_end(xbt, 1); 924 xenbus_transaction_end(xbt, 1);
912 if (message) 925 if (message)
913 xenbus_dev_fatal(dev, err, "%s", message); 926 xenbus_dev_fatal(dev, err, "%s", message);
914 destroy_blkring: 927 destroy_blkring:
915 blkif_free(info, 0); 928 blkif_free(info, 0);
916 out: 929 out:
917 return err; 930 return err;
918 } 931 }
919 932
920 /** 933 /**
921 * Entry point to this code when a new device is created. Allocate the basic 934 * Entry point to this code when a new device is created. Allocate the basic
922 * structures and the ring buffer for communication with the backend, and 935 * structures and the ring buffer for communication with the backend, and
923 * inform the backend of the appropriate details for those. Switch to 936 * inform the backend of the appropriate details for those. Switch to
924 * Initialised state. 937 * Initialised state.
925 */ 938 */
926 static int blkfront_probe(struct xenbus_device *dev, 939 static int blkfront_probe(struct xenbus_device *dev,
927 const struct xenbus_device_id *id) 940 const struct xenbus_device_id *id)
928 { 941 {
929 int err, vdevice, i; 942 int err, vdevice, i;
930 struct blkfront_info *info; 943 struct blkfront_info *info;
931 944
932 /* FIXME: Use dynamic device id if this is not set. */ 945 /* FIXME: Use dynamic device id if this is not set. */
933 err = xenbus_scanf(XBT_NIL, dev->nodename, 946 err = xenbus_scanf(XBT_NIL, dev->nodename,
934 "virtual-device", "%i", &vdevice); 947 "virtual-device", "%i", &vdevice);
935 if (err != 1) { 948 if (err != 1) {
936 /* go looking in the extended area instead */ 949 /* go looking in the extended area instead */
937 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", 950 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
938 "%i", &vdevice); 951 "%i", &vdevice);
939 if (err != 1) { 952 if (err != 1) {
940 xenbus_dev_fatal(dev, err, "reading virtual-device"); 953 xenbus_dev_fatal(dev, err, "reading virtual-device");
941 return err; 954 return err;
942 } 955 }
943 } 956 }
944 957
945 if (xen_hvm_domain()) { 958 if (xen_hvm_domain()) {
946 char *type; 959 char *type;
947 int len; 960 int len;
948 /* no unplug has been done: do not hook devices != xen vbds */ 961 /* no unplug has been done: do not hook devices != xen vbds */
949 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { 962 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
950 int major; 963 int major;
951 964
952 if (!VDEV_IS_EXTENDED(vdevice)) 965 if (!VDEV_IS_EXTENDED(vdevice))
953 major = BLKIF_MAJOR(vdevice); 966 major = BLKIF_MAJOR(vdevice);
954 else 967 else
955 major = XENVBD_MAJOR; 968 major = XENVBD_MAJOR;
956 969
957 if (major != XENVBD_MAJOR) { 970 if (major != XENVBD_MAJOR) {
958 printk(KERN_INFO 971 printk(KERN_INFO
959 "%s: HVM does not support vbd %d as xen block device\n", 972 "%s: HVM does not support vbd %d as xen block device\n",
960 __FUNCTION__, vdevice); 973 __FUNCTION__, vdevice);
961 return -ENODEV; 974 return -ENODEV;
962 } 975 }
963 } 976 }
964 /* do not create a PV cdrom device if we are an HVM guest */ 977 /* do not create a PV cdrom device if we are an HVM guest */
965 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); 978 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
966 if (IS_ERR(type)) 979 if (IS_ERR(type))
967 return -ENODEV; 980 return -ENODEV;
968 if (strncmp(type, "cdrom", 5) == 0) { 981 if (strncmp(type, "cdrom", 5) == 0) {
969 kfree(type); 982 kfree(type);
970 return -ENODEV; 983 return -ENODEV;
971 } 984 }
972 kfree(type); 985 kfree(type);
973 } 986 }
974 info = kzalloc(sizeof(*info), GFP_KERNEL); 987 info = kzalloc(sizeof(*info), GFP_KERNEL);
975 if (!info) { 988 if (!info) {
976 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); 989 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
977 return -ENOMEM; 990 return -ENOMEM;
978 } 991 }
979 992
980 mutex_init(&info->mutex); 993 mutex_init(&info->mutex);
981 info->xbdev = dev; 994 info->xbdev = dev;
982 info->vdevice = vdevice; 995 info->vdevice = vdevice;
983 info->connected = BLKIF_STATE_DISCONNECTED; 996 info->connected = BLKIF_STATE_DISCONNECTED;
984 INIT_WORK(&info->work, blkif_restart_queue); 997 INIT_WORK(&info->work, blkif_restart_queue);
985 998
986 for (i = 0; i < BLK_RING_SIZE; i++) 999 for (i = 0; i < BLK_RING_SIZE; i++)
987 info->shadow[i].req.id = i+1; 1000 info->shadow[i].req.u.rw.id = i+1;
988 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 1001 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
989 1002
990 /* Front end dir is a number, which is used as the id. */ 1003 /* Front end dir is a number, which is used as the id. */
991 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); 1004 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
992 dev_set_drvdata(&dev->dev, info); 1005 dev_set_drvdata(&dev->dev, info);
993 1006
994 err = talk_to_blkback(dev, info); 1007 err = talk_to_blkback(dev, info);
995 if (err) { 1008 if (err) {
996 kfree(info); 1009 kfree(info);
997 dev_set_drvdata(&dev->dev, NULL); 1010 dev_set_drvdata(&dev->dev, NULL);
998 return err; 1011 return err;
999 } 1012 }
1000 1013
1001 return 0; 1014 return 0;
1002 } 1015 }
1003 1016
1004 1017
1005 static int blkif_recover(struct blkfront_info *info) 1018 static int blkif_recover(struct blkfront_info *info)
1006 { 1019 {
1007 int i; 1020 int i;
1008 struct blkif_request *req; 1021 struct blkif_request *req;
1009 struct blk_shadow *copy; 1022 struct blk_shadow *copy;
1010 int j; 1023 int j;
1011 1024
1012 /* Stage 1: Make a safe copy of the shadow state. */ 1025 /* Stage 1: Make a safe copy of the shadow state. */
1013 copy = kmalloc(sizeof(info->shadow), 1026 copy = kmalloc(sizeof(info->shadow),
1014 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); 1027 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
1015 if (!copy) 1028 if (!copy)
1016 return -ENOMEM; 1029 return -ENOMEM;
1017 memcpy(copy, info->shadow, sizeof(info->shadow)); 1030 memcpy(copy, info->shadow, sizeof(info->shadow));
1018 1031
1019 /* Stage 2: Set up free list. */ 1032 /* Stage 2: Set up free list. */
1020 memset(&info->shadow, 0, sizeof(info->shadow)); 1033 memset(&info->shadow, 0, sizeof(info->shadow));
1021 for (i = 0; i < BLK_RING_SIZE; i++) 1034 for (i = 0; i < BLK_RING_SIZE; i++)
1022 info->shadow[i].req.id = i+1; 1035 info->shadow[i].req.u.rw.id = i+1;
1023 info->shadow_free = info->ring.req_prod_pvt; 1036 info->shadow_free = info->ring.req_prod_pvt;
1024 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 1037 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
1025 1038
1026 /* Stage 3: Find pending requests and requeue them. */ 1039 /* Stage 3: Find pending requests and requeue them. */
1027 for (i = 0; i < BLK_RING_SIZE; i++) { 1040 for (i = 0; i < BLK_RING_SIZE; i++) {
1028 /* Not in use? */ 1041 /* Not in use? */
1029 if (!copy[i].request) 1042 if (!copy[i].request)
1030 continue; 1043 continue;
1031 1044
1032 /* Grab a request slot and copy shadow state into it. */ 1045 /* Grab a request slot and copy shadow state into it. */
1033 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 1046 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
1034 *req = copy[i].req; 1047 *req = copy[i].req;
1035 1048
1036 /* We get a new request id, and must reset the shadow state. */ 1049 /* We get a new request id, and must reset the shadow state. */
1037 req->id = get_id_from_freelist(info); 1050 req->u.rw.id = get_id_from_freelist(info);
1038 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i])); 1051 memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
1039 1052
1053 if (req->operation != BLKIF_OP_DISCARD) {
1040 /* Rewrite any grant references invalidated by susp/resume. */ 1054 /* Rewrite any grant references invalidated by susp/resume. */
1041 for (j = 0; j < req->nr_segments; j++) 1055 for (j = 0; j < req->u.rw.nr_segments; j++)
1042 gnttab_grant_foreign_access_ref( 1056 gnttab_grant_foreign_access_ref(
1043 req->u.rw.seg[j].gref, 1057 req->u.rw.seg[j].gref,
1044 info->xbdev->otherend_id, 1058 info->xbdev->otherend_id,
1045 pfn_to_mfn(info->shadow[req->id].frame[j]), 1059 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
1046 rq_data_dir(info->shadow[req->id].request)); 1060 rq_data_dir(info->shadow[req->u.rw.id].request));
1047 info->shadow[req->id].req = *req; 1061 }
1062 info->shadow[req->u.rw.id].req = *req;
1048 1063
1049 info->ring.req_prod_pvt++; 1064 info->ring.req_prod_pvt++;
1050 } 1065 }
1051 1066
1052 kfree(copy); 1067 kfree(copy);
1053 1068
1054 xenbus_switch_state(info->xbdev, XenbusStateConnected); 1069 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1055 1070
1056 spin_lock_irq(&blkif_io_lock); 1071 spin_lock_irq(&blkif_io_lock);
1057 1072
1058 /* Now safe for us to use the shared ring */ 1073 /* Now safe for us to use the shared ring */
1059 info->connected = BLKIF_STATE_CONNECTED; 1074 info->connected = BLKIF_STATE_CONNECTED;
1060 1075
1061 /* Send off requeued requests */ 1076 /* Send off requeued requests */
1062 flush_requests(info); 1077 flush_requests(info);
1063 1078
1064 /* Kick any other new requests queued since we resumed */ 1079 /* Kick any other new requests queued since we resumed */
1065 kick_pending_request_queues(info); 1080 kick_pending_request_queues(info);
1066 1081
1067 spin_unlock_irq(&blkif_io_lock); 1082 spin_unlock_irq(&blkif_io_lock);
1068 1083
1069 return 0; 1084 return 0;
1070 } 1085 }
1071 1086
1072 /** 1087 /**
1073 * We are reconnecting to the backend, due to a suspend/resume, or a backend 1088 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1074 * driver restart. We tear down our blkif structure and recreate it, but 1089 * driver restart. We tear down our blkif structure and recreate it, but
1075 * leave the device-layer structures intact so that this is transparent to the 1090 * leave the device-layer structures intact so that this is transparent to the
1076 * rest of the kernel. 1091 * rest of the kernel.
1077 */ 1092 */
1078 static int blkfront_resume(struct xenbus_device *dev) 1093 static int blkfront_resume(struct xenbus_device *dev)
1079 { 1094 {
1080 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 1095 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1081 int err; 1096 int err;
1082 1097
1083 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); 1098 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
1084 1099
1085 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); 1100 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
1086 1101
1087 err = talk_to_blkback(dev, info); 1102 err = talk_to_blkback(dev, info);
1088 if (info->connected == BLKIF_STATE_SUSPENDED && !err) 1103 if (info->connected == BLKIF_STATE_SUSPENDED && !err)
1089 err = blkif_recover(info); 1104 err = blkif_recover(info);
1090 1105
1091 return err; 1106 return err;
1092 } 1107 }
1093 1108
1094 static void 1109 static void
1095 blkfront_closing(struct blkfront_info *info) 1110 blkfront_closing(struct blkfront_info *info)
1096 { 1111 {
1097 struct xenbus_device *xbdev = info->xbdev; 1112 struct xenbus_device *xbdev = info->xbdev;
1098 struct block_device *bdev = NULL; 1113 struct block_device *bdev = NULL;
1099 1114
1100 mutex_lock(&info->mutex); 1115 mutex_lock(&info->mutex);
1101 1116
1102 if (xbdev->state == XenbusStateClosing) { 1117 if (xbdev->state == XenbusStateClosing) {
1103 mutex_unlock(&info->mutex); 1118 mutex_unlock(&info->mutex);
1104 return; 1119 return;
1105 } 1120 }
1106 1121
1107 if (info->gd) 1122 if (info->gd)
1108 bdev = bdget_disk(info->gd, 0); 1123 bdev = bdget_disk(info->gd, 0);
1109 1124
1110 mutex_unlock(&info->mutex); 1125 mutex_unlock(&info->mutex);
1111 1126
1112 if (!bdev) { 1127 if (!bdev) {
1113 xenbus_frontend_closed(xbdev); 1128 xenbus_frontend_closed(xbdev);
1114 return; 1129 return;
1115 } 1130 }
1116 1131
1117 mutex_lock(&bdev->bd_mutex); 1132 mutex_lock(&bdev->bd_mutex);
1118 1133
1119 if (bdev->bd_openers) { 1134 if (bdev->bd_openers) {
1120 xenbus_dev_error(xbdev, -EBUSY, 1135 xenbus_dev_error(xbdev, -EBUSY,
1121 "Device in use; refusing to close"); 1136 "Device in use; refusing to close");
1122 xenbus_switch_state(xbdev, XenbusStateClosing); 1137 xenbus_switch_state(xbdev, XenbusStateClosing);
1123 } else { 1138 } else {
1124 xlvbd_release_gendisk(info); 1139 xlvbd_release_gendisk(info);
1125 xenbus_frontend_closed(xbdev); 1140 xenbus_frontend_closed(xbdev);
1126 } 1141 }
1127 1142
1128 mutex_unlock(&bdev->bd_mutex); 1143 mutex_unlock(&bdev->bd_mutex);
1129 bdput(bdev); 1144 bdput(bdev);
1130 } 1145 }
1131 1146
1132 static void blkfront_setup_discard(struct blkfront_info *info) 1147 static void blkfront_setup_discard(struct blkfront_info *info)
1133 { 1148 {
1134 int err; 1149 int err;
1135 char *type; 1150 char *type;
1136 unsigned int discard_granularity; 1151 unsigned int discard_granularity;
1137 unsigned int discard_alignment; 1152 unsigned int discard_alignment;
1153 unsigned int discard_secure;
1138 1154
1139 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); 1155 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1140 if (IS_ERR(type)) 1156 if (IS_ERR(type))
1141 return; 1157 return;
1142 1158
1159 info->feature_secdiscard = 0;
1143 if (strncmp(type, "phy", 3) == 0) { 1160 if (strncmp(type, "phy", 3) == 0) {
1144 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1161 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1145 "discard-granularity", "%u", &discard_granularity, 1162 "discard-granularity", "%u", &discard_granularity,
1146 "discard-alignment", "%u", &discard_alignment, 1163 "discard-alignment", "%u", &discard_alignment,
1147 NULL); 1164 NULL);
1148 if (!err) { 1165 if (!err) {
1149 info->feature_discard = 1; 1166 info->feature_discard = 1;
1150 info->discard_granularity = discard_granularity; 1167 info->discard_granularity = discard_granularity;
1151 info->discard_alignment = discard_alignment; 1168 info->discard_alignment = discard_alignment;
1152 } 1169 }
1170 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1171 "discard-secure", "%d", &discard_secure,
1172 NULL);
1173 if (!err)
1174 info->feature_secdiscard = discard_secure;
1175
1153 } else if (strncmp(type, "file", 4) == 0) 1176 } else if (strncmp(type, "file", 4) == 0)
1154 info->feature_discard = 1; 1177 info->feature_discard = 1;
1155 1178
1156 kfree(type); 1179 kfree(type);
1157 } 1180 }
1158 1181
1159 /* 1182 /*
1160 * Invoked when the backend is finally 'ready' (and has told produced 1183 * Invoked when the backend is finally 'ready' (and has told produced
1161 * the details about the physical device - #sectors, size, etc). 1184 * the details about the physical device - #sectors, size, etc).
1162 */ 1185 */
1163 static void blkfront_connect(struct blkfront_info *info) 1186 static void blkfront_connect(struct blkfront_info *info)
1164 { 1187 {
1165 unsigned long long sectors; 1188 unsigned long long sectors;
1166 unsigned long sector_size; 1189 unsigned long sector_size;
1167 unsigned int binfo; 1190 unsigned int binfo;
1168 int err; 1191 int err;
1169 int barrier, flush, discard; 1192 int barrier, flush, discard;
1170 1193
1171 switch (info->connected) { 1194 switch (info->connected) {
1172 case BLKIF_STATE_CONNECTED: 1195 case BLKIF_STATE_CONNECTED:
1173 /* 1196 /*
1174 * Potentially, the back-end may be signalling 1197 * Potentially, the back-end may be signalling
1175 * a capacity change; update the capacity. 1198 * a capacity change; update the capacity.
1176 */ 1199 */
1177 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, 1200 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1178 "sectors", "%Lu", &sectors); 1201 "sectors", "%Lu", &sectors);
1179 if (XENBUS_EXIST_ERR(err)) 1202 if (XENBUS_EXIST_ERR(err))
1180 return; 1203 return;
1181 printk(KERN_INFO "Setting capacity to %Lu\n", 1204 printk(KERN_INFO "Setting capacity to %Lu\n",
1182 sectors); 1205 sectors);
1183 set_capacity(info->gd, sectors); 1206 set_capacity(info->gd, sectors);
1184 revalidate_disk(info->gd); 1207 revalidate_disk(info->gd);
1185 1208
1186 /* fall through */ 1209 /* fall through */
1187 case BLKIF_STATE_SUSPENDED: 1210 case BLKIF_STATE_SUSPENDED:
1188 return; 1211 return;
1189 1212
1190 default: 1213 default:
1191 break; 1214 break;
1192 } 1215 }
1193 1216
1194 dev_dbg(&info->xbdev->dev, "%s:%s.\n", 1217 dev_dbg(&info->xbdev->dev, "%s:%s.\n",
1195 __func__, info->xbdev->otherend); 1218 __func__, info->xbdev->otherend);
1196 1219
1197 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1220 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1198 "sectors", "%llu", &sectors, 1221 "sectors", "%llu", &sectors,
1199 "info", "%u", &binfo, 1222 "info", "%u", &binfo,
1200 "sector-size", "%lu", &sector_size, 1223 "sector-size", "%lu", &sector_size,
1201 NULL); 1224 NULL);
1202 if (err) { 1225 if (err) {
1203 xenbus_dev_fatal(info->xbdev, err, 1226 xenbus_dev_fatal(info->xbdev, err,
1204 "reading backend fields at %s", 1227 "reading backend fields at %s",
1205 info->xbdev->otherend); 1228 info->xbdev->otherend);
1206 return; 1229 return;
1207 } 1230 }
1208 1231
1209 info->feature_flush = 0; 1232 info->feature_flush = 0;
1210 info->flush_op = 0; 1233 info->flush_op = 0;
1211 1234
1212 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1235 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1213 "feature-barrier", "%d", &barrier, 1236 "feature-barrier", "%d", &barrier,
1214 NULL); 1237 NULL);
1215 1238
1216 /* 1239 /*
1217 * If there's no "feature-barrier" defined, then it means 1240 * If there's no "feature-barrier" defined, then it means
1218 * we're dealing with a very old backend which writes 1241 * we're dealing with a very old backend which writes
1219 * synchronously; nothing to do. 1242 * synchronously; nothing to do.
1220 * 1243 *
1221 * If there are barriers, then we use flush. 1244 * If there are barriers, then we use flush.
1222 */ 1245 */
1223 if (!err && barrier) { 1246 if (!err && barrier) {
1224 info->feature_flush = REQ_FLUSH | REQ_FUA; 1247 info->feature_flush = REQ_FLUSH | REQ_FUA;
1225 info->flush_op = BLKIF_OP_WRITE_BARRIER; 1248 info->flush_op = BLKIF_OP_WRITE_BARRIER;
1226 } 1249 }
1227 /* 1250 /*
1228 * And if there is "feature-flush-cache" use that above 1251 * And if there is "feature-flush-cache" use that above
1229 * barriers. 1252 * barriers.
1230 */ 1253 */
1231 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1254 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1232 "feature-flush-cache", "%d", &flush, 1255 "feature-flush-cache", "%d", &flush,
1233 NULL); 1256 NULL);
1234 1257
1235 if (!err && flush) { 1258 if (!err && flush) {
1236 info->feature_flush = REQ_FLUSH; 1259 info->feature_flush = REQ_FLUSH;
1237 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; 1260 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
1238 } 1261 }
1239 1262
1240 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1263 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1241 "feature-discard", "%d", &discard, 1264 "feature-discard", "%d", &discard,
1242 NULL); 1265 NULL);
1243 1266
1244 if (!err && discard) 1267 if (!err && discard)
1245 blkfront_setup_discard(info); 1268 blkfront_setup_discard(info);
1246 1269
1247 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1270 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1248 if (err) { 1271 if (err) {
1249 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 1272 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
1250 info->xbdev->otherend); 1273 info->xbdev->otherend);
1251 return; 1274 return;
1252 } 1275 }
1253 1276
1254 xenbus_switch_state(info->xbdev, XenbusStateConnected); 1277 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1255 1278
1256 /* Kick pending requests. */ 1279 /* Kick pending requests. */
1257 spin_lock_irq(&blkif_io_lock); 1280 spin_lock_irq(&blkif_io_lock);
1258 info->connected = BLKIF_STATE_CONNECTED; 1281 info->connected = BLKIF_STATE_CONNECTED;
1259 kick_pending_request_queues(info); 1282 kick_pending_request_queues(info);
1260 spin_unlock_irq(&blkif_io_lock); 1283 spin_unlock_irq(&blkif_io_lock);
1261 1284
1262 add_disk(info->gd); 1285 add_disk(info->gd);
1263 1286
1264 info->is_ready = 1; 1287 info->is_ready = 1;
1265 } 1288 }
1266 1289
1267 /** 1290 /**
1268 * Callback received when the backend's state changes. 1291 * Callback received when the backend's state changes.
1269 */ 1292 */
1270 static void blkback_changed(struct xenbus_device *dev, 1293 static void blkback_changed(struct xenbus_device *dev,
1271 enum xenbus_state backend_state) 1294 enum xenbus_state backend_state)
1272 { 1295 {
1273 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 1296 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1274 1297
1275 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); 1298 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
1276 1299
1277 switch (backend_state) { 1300 switch (backend_state) {
1278 case XenbusStateInitialising: 1301 case XenbusStateInitialising:
1279 case XenbusStateInitWait: 1302 case XenbusStateInitWait:
1280 case XenbusStateInitialised: 1303 case XenbusStateInitialised:
1281 case XenbusStateReconfiguring: 1304 case XenbusStateReconfiguring:
1282 case XenbusStateReconfigured: 1305 case XenbusStateReconfigured:
1283 case XenbusStateUnknown: 1306 case XenbusStateUnknown:
1284 case XenbusStateClosed: 1307 case XenbusStateClosed:
1285 break; 1308 break;
1286 1309
1287 case XenbusStateConnected: 1310 case XenbusStateConnected:
1288 blkfront_connect(info); 1311 blkfront_connect(info);
1289 break; 1312 break;
1290 1313
1291 case XenbusStateClosing: 1314 case XenbusStateClosing:
1292 blkfront_closing(info); 1315 blkfront_closing(info);
1293 break; 1316 break;
1294 } 1317 }
1295 } 1318 }
1296 1319
1297 static int blkfront_remove(struct xenbus_device *xbdev) 1320 static int blkfront_remove(struct xenbus_device *xbdev)
1298 { 1321 {
1299 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); 1322 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
1300 struct block_device *bdev = NULL; 1323 struct block_device *bdev = NULL;
1301 struct gendisk *disk; 1324 struct gendisk *disk;
1302 1325
1303 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); 1326 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
1304 1327
1305 blkif_free(info, 0); 1328 blkif_free(info, 0);
1306 1329
1307 mutex_lock(&info->mutex); 1330 mutex_lock(&info->mutex);
1308 1331
1309 disk = info->gd; 1332 disk = info->gd;
1310 if (disk) 1333 if (disk)
1311 bdev = bdget_disk(disk, 0); 1334 bdev = bdget_disk(disk, 0);
1312 1335
1313 info->xbdev = NULL; 1336 info->xbdev = NULL;
1314 mutex_unlock(&info->mutex); 1337 mutex_unlock(&info->mutex);
1315 1338
1316 if (!bdev) { 1339 if (!bdev) {
1317 kfree(info); 1340 kfree(info);
1318 return 0; 1341 return 0;
1319 } 1342 }
1320 1343
1321 /* 1344 /*
1322 * The xbdev was removed before we reached the Closed 1345 * The xbdev was removed before we reached the Closed
1323 * state. See if it's safe to remove the disk. If the bdev 1346 * state. See if it's safe to remove the disk. If the bdev
1324 * isn't closed yet, we let release take care of it. 1347 * isn't closed yet, we let release take care of it.
1325 */ 1348 */
1326 1349
1327 mutex_lock(&bdev->bd_mutex); 1350 mutex_lock(&bdev->bd_mutex);
1328 info = disk->private_data; 1351 info = disk->private_data;
1329 1352
1330 dev_warn(disk_to_dev(disk), 1353 dev_warn(disk_to_dev(disk),
1331 "%s was hot-unplugged, %d stale handles\n", 1354 "%s was hot-unplugged, %d stale handles\n",
1332 xbdev->nodename, bdev->bd_openers); 1355 xbdev->nodename, bdev->bd_openers);
1333 1356
1334 if (info && !bdev->bd_openers) { 1357 if (info && !bdev->bd_openers) {
1335 xlvbd_release_gendisk(info); 1358 xlvbd_release_gendisk(info);
1336 disk->private_data = NULL; 1359 disk->private_data = NULL;
1337 kfree(info); 1360 kfree(info);
1338 } 1361 }
1339 1362
1340 mutex_unlock(&bdev->bd_mutex); 1363 mutex_unlock(&bdev->bd_mutex);
1341 bdput(bdev); 1364 bdput(bdev);
1342 1365
1343 return 0; 1366 return 0;
1344 } 1367 }
1345 1368
1346 static int blkfront_is_ready(struct xenbus_device *dev) 1369 static int blkfront_is_ready(struct xenbus_device *dev)
1347 { 1370 {
1348 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 1371 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1349 1372
1350 return info->is_ready && info->xbdev; 1373 return info->is_ready && info->xbdev;
1351 } 1374 }
1352 1375
1353 static int blkif_open(struct block_device *bdev, fmode_t mode) 1376 static int blkif_open(struct block_device *bdev, fmode_t mode)
1354 { 1377 {
1355 struct gendisk *disk = bdev->bd_disk; 1378 struct gendisk *disk = bdev->bd_disk;
1356 struct blkfront_info *info; 1379 struct blkfront_info *info;
1357 int err = 0; 1380 int err = 0;
1358 1381
1359 mutex_lock(&blkfront_mutex); 1382 mutex_lock(&blkfront_mutex);
1360 1383
1361 info = disk->private_data; 1384 info = disk->private_data;
1362 if (!info) { 1385 if (!info) {
1363 /* xbdev gone */ 1386 /* xbdev gone */
1364 err = -ERESTARTSYS; 1387 err = -ERESTARTSYS;
1365 goto out; 1388 goto out;
1366 } 1389 }
1367 1390
1368 mutex_lock(&info->mutex); 1391 mutex_lock(&info->mutex);
1369 1392
1370 if (!info->gd) 1393 if (!info->gd)
1371 /* xbdev is closed */ 1394 /* xbdev is closed */
1372 err = -ERESTARTSYS; 1395 err = -ERESTARTSYS;
1373 1396
1374 mutex_unlock(&info->mutex); 1397 mutex_unlock(&info->mutex);
1375 1398
1376 out: 1399 out:
1377 mutex_unlock(&blkfront_mutex); 1400 mutex_unlock(&blkfront_mutex);
1378 return err; 1401 return err;
1379 } 1402 }
1380 1403
1381 static int blkif_release(struct gendisk *disk, fmode_t mode) 1404 static int blkif_release(struct gendisk *disk, fmode_t mode)
1382 { 1405 {
1383 struct blkfront_info *info = disk->private_data; 1406 struct blkfront_info *info = disk->private_data;
1384 struct block_device *bdev; 1407 struct block_device *bdev;
1385 struct xenbus_device *xbdev; 1408 struct xenbus_device *xbdev;
1386 1409
1387 mutex_lock(&blkfront_mutex); 1410 mutex_lock(&blkfront_mutex);
1388 1411
1389 bdev = bdget_disk(disk, 0); 1412 bdev = bdget_disk(disk, 0);
1390 bdput(bdev); 1413 bdput(bdev);
1391 1414
1392 if (bdev->bd_openers) 1415 if (bdev->bd_openers)
1393 goto out; 1416 goto out;
1394 1417
1395 /* 1418 /*
1396 * Check if we have been instructed to close. We will have 1419 * Check if we have been instructed to close. We will have
1397 * deferred this request, because the bdev was still open. 1420 * deferred this request, because the bdev was still open.
1398 */ 1421 */
1399 1422
1400 mutex_lock(&info->mutex); 1423 mutex_lock(&info->mutex);
1401 xbdev = info->xbdev; 1424 xbdev = info->xbdev;
1402 1425
1403 if (xbdev && xbdev->state == XenbusStateClosing) { 1426 if (xbdev && xbdev->state == XenbusStateClosing) {
1404 /* pending switch to state closed */ 1427 /* pending switch to state closed */
1405 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 1428 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1406 xlvbd_release_gendisk(info); 1429 xlvbd_release_gendisk(info);
1407 xenbus_frontend_closed(info->xbdev); 1430 xenbus_frontend_closed(info->xbdev);
1408 } 1431 }
1409 1432
1410 mutex_unlock(&info->mutex); 1433 mutex_unlock(&info->mutex);
1411 1434
1412 if (!xbdev) { 1435 if (!xbdev) {
1413 /* sudden device removal */ 1436 /* sudden device removal */
1414 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 1437 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1415 xlvbd_release_gendisk(info); 1438 xlvbd_release_gendisk(info);
1416 disk->private_data = NULL; 1439 disk->private_data = NULL;
1417 kfree(info); 1440 kfree(info);
1418 } 1441 }
1419 1442
1420 out: 1443 out:
1421 mutex_unlock(&blkfront_mutex); 1444 mutex_unlock(&blkfront_mutex);
1422 return 0; 1445 return 0;
1423 } 1446 }
1424 1447
1425 static const struct block_device_operations xlvbd_block_fops = 1448 static const struct block_device_operations xlvbd_block_fops =
1426 { 1449 {
1427 .owner = THIS_MODULE, 1450 .owner = THIS_MODULE,
1428 .open = blkif_open, 1451 .open = blkif_open,
1429 .release = blkif_release, 1452 .release = blkif_release,
1430 .getgeo = blkif_getgeo, 1453 .getgeo = blkif_getgeo,
1431 .ioctl = blkif_ioctl, 1454 .ioctl = blkif_ioctl,
1432 }; 1455 };
1433 1456
1434 1457
1435 static const struct xenbus_device_id blkfront_ids[] = { 1458 static const struct xenbus_device_id blkfront_ids[] = {
1436 { "vbd" }, 1459 { "vbd" },
1437 { "" } 1460 { "" }
1438 }; 1461 };
1439 1462
1440 static DEFINE_XENBUS_DRIVER(blkfront, , 1463 static DEFINE_XENBUS_DRIVER(blkfront, ,
1441 .probe = blkfront_probe, 1464 .probe = blkfront_probe,
1442 .remove = blkfront_remove, 1465 .remove = blkfront_remove,
1443 .resume = blkfront_resume, 1466 .resume = blkfront_resume,
1444 .otherend_changed = blkback_changed, 1467 .otherend_changed = blkback_changed,
1445 .is_ready = blkfront_is_ready, 1468 .is_ready = blkfront_is_ready,
1446 ); 1469 );
1447 1470
1448 static int __init xlblk_init(void) 1471 static int __init xlblk_init(void)
1449 { 1472 {
1450 int ret; 1473 int ret;
1451 1474
1452 if (!xen_domain()) 1475 if (!xen_domain())
1453 return -ENODEV; 1476 return -ENODEV;
1454 1477
1455 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 1478 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
1456 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", 1479 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
1457 XENVBD_MAJOR, DEV_NAME); 1480 XENVBD_MAJOR, DEV_NAME);
1458 return -ENODEV; 1481 return -ENODEV;
1459 } 1482 }
1460 1483
1461 ret = xenbus_register_frontend(&blkfront_driver); 1484 ret = xenbus_register_frontend(&blkfront_driver);
1462 if (ret) { 1485 if (ret) {
1463 unregister_blkdev(XENVBD_MAJOR, DEV_NAME); 1486 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1464 return ret; 1487 return ret;
1465 } 1488 }
1466 1489
1467 return 0; 1490 return 0;
1468 } 1491 }
1469 module_init(xlblk_init); 1492 module_init(xlblk_init);
1470 1493
1471 1494
1472 static void __exit xlblk_exit(void) 1495 static void __exit xlblk_exit(void)
1473 { 1496 {
1474 return xenbus_unregister_driver(&blkfront_driver); 1497 return xenbus_unregister_driver(&blkfront_driver);
1475 } 1498 }
1476 module_exit(xlblk_exit); 1499 module_exit(xlblk_exit);
1477 1500
1478 MODULE_DESCRIPTION("Xen virtual block device frontend"); 1501 MODULE_DESCRIPTION("Xen virtual block device frontend");
1479 MODULE_LICENSE("GPL"); 1502 MODULE_LICENSE("GPL");
1480 MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); 1503 MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
1481 MODULE_ALIAS("xen:vbd"); 1504 MODULE_ALIAS("xen:vbd");
1482 MODULE_ALIAS("xenblk"); 1505 MODULE_ALIAS("xenblk");
include/xen/interface/io/blkif.h
1 /****************************************************************************** 1 /******************************************************************************
2 * blkif.h 2 * blkif.h
3 * 3 *
4 * Unified block-device I/O interface for Xen guest OSes. 4 * Unified block-device I/O interface for Xen guest OSes.
5 * 5 *
6 * Copyright (c) 2003-2004, Keir Fraser 6 * Copyright (c) 2003-2004, Keir Fraser
7 */ 7 */
8 8
9 #ifndef __XEN_PUBLIC_IO_BLKIF_H__ 9 #ifndef __XEN_PUBLIC_IO_BLKIF_H__
10 #define __XEN_PUBLIC_IO_BLKIF_H__ 10 #define __XEN_PUBLIC_IO_BLKIF_H__
11 11
12 #include "ring.h" 12 #include "ring.h"
13 #include "../grant_table.h" 13 #include "../grant_table.h"
14 14
15 /* 15 /*
16 * Front->back notifications: When enqueuing a new request, sending a 16 * Front->back notifications: When enqueuing a new request, sending a
17 * notification can be made conditional on req_event (i.e., the generic 17 * notification can be made conditional on req_event (i.e., the generic
18 * hold-off mechanism provided by the ring macros). Backends must set 18 * hold-off mechanism provided by the ring macros). Backends must set
19 * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). 19 * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
20 * 20 *
21 * Back->front notifications: When enqueuing a new response, sending a 21 * Back->front notifications: When enqueuing a new response, sending a
22 * notification can be made conditional on rsp_event (i.e., the generic 22 * notification can be made conditional on rsp_event (i.e., the generic
23 * hold-off mechanism provided by the ring macros). Frontends must set 23 * hold-off mechanism provided by the ring macros). Frontends must set
24 * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). 24 * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
25 */ 25 */
26 26
27 typedef uint16_t blkif_vdev_t; 27 typedef uint16_t blkif_vdev_t;
28 typedef uint64_t blkif_sector_t; 28 typedef uint64_t blkif_sector_t;
29 29
30 /* 30 /*
31 * REQUEST CODES. 31 * REQUEST CODES.
32 */ 32 */
33 #define BLKIF_OP_READ 0 33 #define BLKIF_OP_READ 0
34 #define BLKIF_OP_WRITE 1 34 #define BLKIF_OP_WRITE 1
35 /* 35 /*
36 * Recognised only if "feature-barrier" is present in backend xenbus info. 36 * Recognised only if "feature-barrier" is present in backend xenbus info.
37 * The "feature_barrier" node contains a boolean indicating whether barrier 37 * The "feature_barrier" node contains a boolean indicating whether barrier
38 * requests are likely to succeed or fail. Either way, a barrier request 38 * requests are likely to succeed or fail. Either way, a barrier request
39 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by 39 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
40 * the underlying block-device hardware. The boolean simply indicates whether 40 * the underlying block-device hardware. The boolean simply indicates whether
41 * or not it is worthwhile for the frontend to attempt barrier requests. 41 * or not it is worthwhile for the frontend to attempt barrier requests.
42 * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* 42 * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
43 * create the "feature-barrier" node! 43 * create the "feature-barrier" node!
44 */ 44 */
45 #define BLKIF_OP_WRITE_BARRIER 2 45 #define BLKIF_OP_WRITE_BARRIER 2
46 46
47 /* 47 /*
48 * Recognised if "feature-flush-cache" is present in backend xenbus 48 * Recognised if "feature-flush-cache" is present in backend xenbus
49 * info. A flush will ask the underlying storage hardware to flush its 49 * info. A flush will ask the underlying storage hardware to flush its
50 * non-volatile caches as appropriate. The "feature-flush-cache" node 50 * non-volatile caches as appropriate. The "feature-flush-cache" node
51 * contains a boolean indicating whether flush requests are likely to 51 * contains a boolean indicating whether flush requests are likely to
52 * succeed or fail. Either way, a flush request may fail at any time 52 * succeed or fail. Either way, a flush request may fail at any time
53 * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying 53 * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
54 * block-device hardware. The boolean simply indicates whether or not it 54 * block-device hardware. The boolean simply indicates whether or not it
55 * is worthwhile for the frontend to attempt flushes. If a backend does 55 * is worthwhile for the frontend to attempt flushes. If a backend does
56 * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the 56 * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
57 * "feature-flush-cache" node! 57 * "feature-flush-cache" node!
58 */ 58 */
59 #define BLKIF_OP_FLUSH_DISKCACHE 3 59 #define BLKIF_OP_FLUSH_DISKCACHE 3
60 60
61 /* 61 /*
62 * Recognised only if "feature-discard" is present in backend xenbus info. 62 * Recognised only if "feature-discard" is present in backend xenbus info.
63 * The "feature-discard" node contains a boolean indicating whether trim 63 * The "feature-discard" node contains a boolean indicating whether trim
64 * (ATA) or unmap (SCSI) - conviently called discard requests are likely 64 * (ATA) or unmap (SCSI) - conviently called discard requests are likely
65 * to succeed or fail. Either way, a discard request 65 * to succeed or fail. Either way, a discard request
66 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by 66 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
67 * the underlying block-device hardware. The boolean simply indicates whether 67 * the underlying block-device hardware. The boolean simply indicates whether
68 * or not it is worthwhile for the frontend to attempt discard requests. 68 * or not it is worthwhile for the frontend to attempt discard requests.
69 * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* 69 * If a backend does not recognise BLKIF_OP_DISCARD, it should *not*
70 * create the "feature-discard" node! 70 * create the "feature-discard" node!
71 * 71 *
72 * Discard operation is a request for the underlying block device to mark 72 * Discard operation is a request for the underlying block device to mark
73 * extents to be erased. However, discard does not guarantee that the blocks 73 * extents to be erased. However, discard does not guarantee that the blocks
74 * will be erased from the device - it is just a hint to the device 74 * will be erased from the device - it is just a hint to the device
75 * controller that these blocks are no longer in use. What the device 75 * controller that these blocks are no longer in use. What the device
76 * controller does with that information is left to the controller. 76 * controller does with that information is left to the controller.
77 * Discard operations are passed with sector_number as the 77 * Discard operations are passed with sector_number as the
78 * sector index to begin discard operations at and nr_sectors as the number of 78 * sector index to begin discard operations at and nr_sectors as the number of
79 * sectors to be discarded. The specified sectors should be discarded if the 79 * sectors to be discarded. The specified sectors should be discarded if the
80 * underlying block device supports trim (ATA) or unmap (SCSI) operations, 80 * underlying block device supports trim (ATA) or unmap (SCSI) operations,
81 * or a BLKIF_RSP_EOPNOTSUPP should be returned. 81 * or a BLKIF_RSP_EOPNOTSUPP should be returned.
82 * More information about trim/unmap operations at: 82 * More information about trim/unmap operations at:
83 * http://t13.org/Documents/UploadedDocuments/docs2008/ 83 * http://t13.org/Documents/UploadedDocuments/docs2008/
84 * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc 84 * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
85 * http://www.seagate.com/staticfiles/support/disc/manuals/ 85 * http://www.seagate.com/staticfiles/support/disc/manuals/
86 * Interface%20manuals/100293068c.pdf 86 * Interface%20manuals/100293068c.pdf
87 * The backend can optionally provide three extra XenBus attributes to
88 * further optimize the discard functionality:
89 * 'discard-aligment' - Devices that support discard functionality may
90 * internally allocate space in units that are bigger than the exported
91 * logical block size. The discard-alignment parameter indicates how many bytes
92 * the beginning of the partition is offset from the internal allocation unit's
93 * natural alignment.
94 * 'discard-granularity' - Devices that support discard functionality may
95 * internally allocate space using units that are bigger than the logical block
96 * size. The discard-granularity parameter indicates the size of the internal
97 * allocation unit in bytes if reported by the device. Otherwise the
98 * discard-granularity will be set to match the device's physical block size.
99 * 'discard-secure' - All copies of the discarded sectors (potentially created
100 * by garbage collection) must also be erased. To use this feature, the flag
101 * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim.
87 */ 102 */
88 #define BLKIF_OP_DISCARD 5 103 #define BLKIF_OP_DISCARD 5
89 104
90 /* 105 /*
91 * Maximum scatter/gather segments per request. 106 * Maximum scatter/gather segments per request.
92 * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. 107 * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
93 * NB. This could be 12 if the ring indexes weren't stored in the same page. 108 * NB. This could be 12 if the ring indexes weren't stored in the same page.
94 */ 109 */
95 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 110 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
96 111
97 struct blkif_request_rw { 112 struct blkif_request_rw {
113 uint8_t nr_segments; /* number of segments */
114 blkif_vdev_t handle; /* only for read/write requests */
115 #ifdef CONFIG_X86_64
116 uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */
117 #endif
118 uint64_t id; /* private guest value, echoed in resp */
98 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 119 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
99 struct blkif_request_segment { 120 struct blkif_request_segment {
100 grant_ref_t gref; /* reference to I/O buffer frame */ 121 grant_ref_t gref; /* reference to I/O buffer frame */
101 /* @first_sect: first sector in frame to transfer (inclusive). */ 122 /* @first_sect: first sector in frame to transfer (inclusive). */
102 /* @last_sect: last sector in frame to transfer (inclusive). */ 123 /* @last_sect: last sector in frame to transfer (inclusive). */
103 uint8_t first_sect, last_sect; 124 uint8_t first_sect, last_sect;
104 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 125 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
105 }; 126 } __attribute__((__packed__));
106 127
107 struct blkif_request_discard { 128 struct blkif_request_discard {
129 uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */
130 #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
131 blkif_vdev_t _pad1; /* only for read/write requests */
132 #ifdef CONFIG_X86_64
133 uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/
134 #endif
135 uint64_t id; /* private guest value, echoed in resp */
108 blkif_sector_t sector_number; 136 blkif_sector_t sector_number;
109 uint64_t nr_sectors; 137 uint64_t nr_sectors;
110 }; 138 uint8_t _pad3;
139 } __attribute__((__packed__));
111 140
112 struct blkif_request { 141 struct blkif_request {
113 uint8_t operation; /* BLKIF_OP_??? */ 142 uint8_t operation; /* BLKIF_OP_??? */
114 uint8_t nr_segments; /* number of segments */
115 blkif_vdev_t handle; /* only for read/write requests */
116 uint64_t id; /* private guest value, echoed in resp */
117 union { 143 union {
118 struct blkif_request_rw rw; 144 struct blkif_request_rw rw;
119 struct blkif_request_discard discard; 145 struct blkif_request_discard discard;
120 } u; 146 } u;
121 }; 147 } __attribute__((__packed__));
122 148
123 struct blkif_response { 149 struct blkif_response {
124 uint64_t id; /* copied from request */ 150 uint64_t id; /* copied from request */
125 uint8_t operation; /* copied from request */ 151 uint8_t operation; /* copied from request */
126 int16_t status; /* BLKIF_RSP_??? */ 152 int16_t status; /* BLKIF_RSP_??? */
127 }; 153 };
128 154
129 /* 155 /*
130 * STATUS RETURN CODES. 156 * STATUS RETURN CODES.
131 */ 157 */
132 /* Operation not supported (only happens on barrier writes). */ 158 /* Operation not supported (only happens on barrier writes). */
133 #define BLKIF_RSP_EOPNOTSUPP -2 159 #define BLKIF_RSP_EOPNOTSUPP -2
134 /* Operation failed for some unspecified reason (-EIO). */ 160 /* Operation failed for some unspecified reason (-EIO). */
135 #define BLKIF_RSP_ERROR -1 161 #define BLKIF_RSP_ERROR -1
136 /* Operation completed successfully. */ 162 /* Operation completed successfully. */
137 #define BLKIF_RSP_OKAY 0 163 #define BLKIF_RSP_OKAY 0
138 164
139 /* 165 /*
140 * Generate blkif ring structures and types. 166 * Generate blkif ring structures and types.
141 */ 167 */
142 168
143 DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); 169 DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
144 170
145 #define VDISK_CDROM 0x1 171 #define VDISK_CDROM 0x1
146 #define VDISK_REMOVABLE 0x2 172 #define VDISK_REMOVABLE 0x2
147 #define VDISK_READONLY 0x4 173 #define VDISK_READONLY 0x4
148 174
149 /* Xen-defined major numbers for virtual disks, they look strangely 175 /* Xen-defined major numbers for virtual disks, they look strangely
150 * familiar */ 176 * familiar */
151 #define XEN_IDE0_MAJOR 3 177 #define XEN_IDE0_MAJOR 3
152 #define XEN_IDE1_MAJOR 22 178 #define XEN_IDE1_MAJOR 22
153 #define XEN_SCSI_DISK0_MAJOR 8 179 #define XEN_SCSI_DISK0_MAJOR 8
154 #define XEN_SCSI_DISK1_MAJOR 65 180 #define XEN_SCSI_DISK1_MAJOR 65
155 #define XEN_SCSI_DISK2_MAJOR 66 181 #define XEN_SCSI_DISK2_MAJOR 66
156 #define XEN_SCSI_DISK3_MAJOR 67 182 #define XEN_SCSI_DISK3_MAJOR 67
157 #define XEN_SCSI_DISK4_MAJOR 68 183 #define XEN_SCSI_DISK4_MAJOR 68
158 #define XEN_SCSI_DISK5_MAJOR 69 184 #define XEN_SCSI_DISK5_MAJOR 69
159 #define XEN_SCSI_DISK6_MAJOR 70 185 #define XEN_SCSI_DISK6_MAJOR 70
160 #define XEN_SCSI_DISK7_MAJOR 71 186 #define XEN_SCSI_DISK7_MAJOR 71
161 #define XEN_SCSI_DISK8_MAJOR 128 187 #define XEN_SCSI_DISK8_MAJOR 128
162 #define XEN_SCSI_DISK9_MAJOR 129 188 #define XEN_SCSI_DISK9_MAJOR 129
163 #define XEN_SCSI_DISK10_MAJOR 130 189 #define XEN_SCSI_DISK10_MAJOR 130
164 #define XEN_SCSI_DISK11_MAJOR 131 190 #define XEN_SCSI_DISK11_MAJOR 131
165 #define XEN_SCSI_DISK12_MAJOR 132 191 #define XEN_SCSI_DISK12_MAJOR 132
166 #define XEN_SCSI_DISK13_MAJOR 133 192 #define XEN_SCSI_DISK13_MAJOR 133
167 #define XEN_SCSI_DISK14_MAJOR 134 193 #define XEN_SCSI_DISK14_MAJOR 134
168 #define XEN_SCSI_DISK15_MAJOR 135 194 #define XEN_SCSI_DISK15_MAJOR 135