]> Pileus Git - ~andy/linux/blob - drivers/staging/hv/blkvsc_drv.c
Staging: hv: Get rid of IDE details from blkvsc_drv.c
[~andy/linux] / drivers / staging / hv / blkvsc_drv.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  *
21  * 4/3/2011: K. Y. Srinivasan - Significant restructuring and cleanup.
22  */
23 #include <linux/init.h>
24 #include <linux/module.h>
25 #include <linux/device.h>
26 #include <linux/blkdev.h>
27 #include <linux/major.h>
28 #include <linux/delay.h>
29 #include <linux/hdreg.h>
30 #include <linux/mutex.h>
31 #include <linux/slab.h>
32 #include <scsi/scsi.h>
33 #include <scsi/scsi_cmnd.h>
34 #include <scsi/scsi_eh.h>
35 #include <scsi/scsi_dbg.h>
36 #include "hv_api.h"
37 #include "logging.h"
38 #include "version_info.h"
39 #include "vmbus.h"
40 #include "storvsc_api.h"
41
42
43 #define BLKVSC_MINORS   64
44
45 enum blkvsc_device_type {
46         UNKNOWN_DEV_TYPE,
47         HARDDISK_TYPE,
48         DVD_TYPE,
49 };
50
51 enum blkvsc_op_type {
52         DO_INQUIRY,
53         DO_CAPACITY,
54         DO_FLUSH,
55 };
56
57 /*
58  * This request ties the struct request and struct
59  * blkvsc_request/hv_storvsc_request together A struct request may be
60  * represented by 1 or more struct blkvsc_request
61  */
62 struct blkvsc_request_group {
63         int outstanding;
64         int status;
65         struct list_head blkvsc_req_list;       /* list of blkvsc_requests */
66 };
67
68 struct blkvsc_request {
69         /* blkvsc_request_group.blkvsc_req_list */
70         struct list_head req_entry;
71
72         /* block_device_context.pending_list */
73         struct list_head pend_entry;
74
75         /* This may be null if we generate a request internally */
76         struct request *req;
77
78         struct block_device_context *dev;
79
80         /* The group this request is part of. Maybe null */
81         struct blkvsc_request_group *group;
82
83         int write;
84         sector_t sector_start;
85         unsigned long sector_count;
86
87         unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE];
88         unsigned char cmd_len;
89         unsigned char cmnd[MAX_COMMAND_SIZE];
90
91         struct hv_storvsc_request request;
92 };
93
94 /* Per device structure */
95 struct block_device_context {
96         /* point back to our device context */
97         struct hv_device *device_ctx;
98         struct kmem_cache *request_pool;
99         spinlock_t lock;
100         struct gendisk *gd;
101         enum blkvsc_device_type device_type;
102         struct list_head pending_list;
103
104         unsigned char device_id[64];
105         unsigned int device_id_len;
106         int num_outstanding_reqs;
107         int shutting_down;
108         unsigned int sector_size;
109         sector_t capacity;
110         unsigned int port;
111         unsigned char path;
112         unsigned char target;
113         int users;
114 };
115
116 static DEFINE_MUTEX(blkvsc_mutex);
117
118 static const char *g_blk_driver_name = "blkvsc";
119
120 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
121 static const struct hv_guid g_blk_device_type = {
122         .data = {
123                 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
124                 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5
125         }
126 };
127
128 /*
129  * There is a circular dependency involving blkvsc_request_completion()
130  * and blkvsc_do_request().
131  */
132 static void blkvsc_request_completion(struct hv_storvsc_request *request);
133
134 static int blkvsc_ringbuffer_size = BLKVSC_RING_BUFFER_SIZE;
135
136 module_param(blkvsc_ringbuffer_size, int, S_IRUGO);
137 MODULE_PARM_DESC(ring_size, "Ring buffer size (in bytes)");
138
139 /*
140  * There is a circular dependency involving blkvsc_probe()
141  * and block_ops.
142  */
143 static int blkvsc_probe(struct device *dev);
144
145 static int blk_vsc_on_device_add(struct hv_device *device,
146                                 void *additional_info)
147 {
148         struct storvsc_device_info *device_info;
149         int ret = 0;
150
151         device_info = (struct storvsc_device_info *)additional_info;
152
153         ret = stor_vsc_on_device_add(device, additional_info);
154         if (ret != 0)
155                 return ret;
156
157         /*
158          * We need to use the device instance guid to set the path and target
159          * id. For IDE devices, the device instance id is formatted as
160          * <bus id> * - <device id> - 8899 - 000000000000.
161          */
162         device_info->path_id = device->dev_instance.data[3] << 24 |
163                              device->dev_instance.data[2] << 16 |
164                              device->dev_instance.data[1] << 8  |
165                              device->dev_instance.data[0];
166
167         device_info->target_id = device->dev_instance.data[5] << 8 |
168                                device->dev_instance.data[4];
169
170         return ret;
171 }
172
173
174 static int blk_vsc_initialize(struct hv_driver *driver)
175 {
176         struct storvsc_driver_object *stor_driver;
177         int ret = 0;
178
179         stor_driver = hvdr_to_stordr(driver);
180
181         /* Make sure we are at least 2 pages since 1 page is used for control */
182
183         driver->name = g_blk_driver_name;
184         memcpy(&driver->dev_type, &g_blk_device_type, sizeof(struct hv_guid));
185
186
187         /*
188          * Divide the ring buffer data size (which is 1 page less than the ring
189          * buffer size since that page is reserved for the ring buffer indices)
190          * by the max request size (which is
191          * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64)
192          */
193         stor_driver->max_outstanding_req_per_channel =
194                 ((stor_driver->ring_buffer_size - PAGE_SIZE) /
195                   ALIGN(MAX_MULTIPAGE_BUFFER_PACKET +
196                            sizeof(struct vstor_packet) + sizeof(u64),
197                            sizeof(u64)));
198
199         DPRINT_INFO(BLKVSC, "max io outstd %u",
200                     stor_driver->max_outstanding_req_per_channel);
201
202         /* Setup the dispatch table */
203         stor_driver->base.dev_add = blk_vsc_on_device_add;
204         stor_driver->base.dev_rm = stor_vsc_on_device_remove;
205         stor_driver->base.cleanup = stor_vsc_on_cleanup;
206         stor_driver->on_io_request = stor_vsc_on_io_request;
207
208         return ret;
209 }
210
211
212 static int blkvsc_submit_request(struct blkvsc_request *blkvsc_req,
213                         void (*request_completion)(struct hv_storvsc_request *))
214 {
215         struct block_device_context *blkdev = blkvsc_req->dev;
216         struct hv_device *device_ctx = blkdev->device_ctx;
217         struct hv_driver *drv =
218                         drv_to_hv_drv(device_ctx->device.driver);
219         struct storvsc_driver_object *storvsc_drv_obj =
220                         drv->priv;
221         struct hv_storvsc_request *storvsc_req;
222         struct vmscsi_request *vm_srb;
223         int ret;
224
225
226         storvsc_req = &blkvsc_req->request;
227         vm_srb = &storvsc_req->vstor_packet.vm_srb;
228
229         vm_srb->data_in = blkvsc_req->write ? WRITE_TYPE : READ_TYPE;
230
231         storvsc_req->on_io_completion = request_completion;
232         storvsc_req->context = blkvsc_req;
233
234         vm_srb->port_number = blkdev->port;
235         vm_srb->path_id = blkdev->path;
236         vm_srb->target_id = blkdev->target;
237         vm_srb->lun = 0;         /* this is not really used at all */
238
239         vm_srb->cdb_length = blkvsc_req->cmd_len;
240
241         memcpy(vm_srb->cdb, blkvsc_req->cmnd, vm_srb->cdb_length);
242
243         storvsc_req->sense_buffer = blkvsc_req->sense_buffer;
244
245         ret = storvsc_drv_obj->on_io_request(blkdev->device_ctx,
246                                            &blkvsc_req->request);
247         if (ret == 0)
248                 blkdev->num_outstanding_reqs++;
249
250         return ret;
251 }
252
253
254 static int blkvsc_open(struct block_device *bdev, fmode_t mode)
255 {
256         struct block_device_context *blkdev = bdev->bd_disk->private_data;
257
258         spin_lock(&blkdev->lock);
259
260         blkdev->users++;
261
262         spin_unlock(&blkdev->lock);
263
264         return 0;
265 }
266
267
268 static int blkvsc_getgeo(struct block_device *bd, struct hd_geometry *hg)
269 {
270         sector_t nsect = get_capacity(bd->bd_disk);
271         sector_t cylinders = nsect;
272
273         /*
274          * We are making up these values; let us keep it simple.
275          */
276         hg->heads = 0xff;
277         hg->sectors = 0x3f;
278         sector_div(cylinders, hg->heads * hg->sectors);
279         hg->cylinders = cylinders;
280         if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
281                 hg->cylinders = 0xffff;
282         return 0;
283
284 }
285
286
287 static void blkvsc_init_rw(struct blkvsc_request *blkvsc_req)
288 {
289
290         blkvsc_req->cmd_len = 16;
291
292         if (rq_data_dir(blkvsc_req->req)) {
293                 blkvsc_req->write = 1;
294                 blkvsc_req->cmnd[0] = WRITE_16;
295         } else {
296                 blkvsc_req->write = 0;
297                 blkvsc_req->cmnd[0] = READ_16;
298         }
299
300         blkvsc_req->cmnd[1] |=
301         (blkvsc_req->req->cmd_flags & REQ_FUA) ? 0x8 : 0;
302
303         *(unsigned long long *)&blkvsc_req->cmnd[2] =
304         cpu_to_be64(blkvsc_req->sector_start);
305         *(unsigned int *)&blkvsc_req->cmnd[10] =
306         cpu_to_be32(blkvsc_req->sector_count);
307 }
308
309
310 static int blkvsc_ioctl(struct block_device *bd, fmode_t mode,
311                         unsigned cmd, unsigned long arg)
312 {
313         struct block_device_context *blkdev = bd->bd_disk->private_data;
314         int ret = 0;
315
316         switch (cmd) {
317         case HDIO_GET_IDENTITY:
318                 if (copy_to_user((void __user *)arg, blkdev->device_id,
319                                  blkdev->device_id_len))
320                         ret = -EFAULT;
321                 break;
322         default:
323                 ret = -EINVAL;
324                 break;
325         }
326
327         return ret;
328 }
329
330 static void blkvsc_cmd_completion(struct hv_storvsc_request *request)
331 {
332         struct blkvsc_request *blkvsc_req =
333                         (struct blkvsc_request *)request->context;
334         struct block_device_context *blkdev =
335                         (struct block_device_context *)blkvsc_req->dev;
336         struct scsi_sense_hdr sense_hdr;
337         struct vmscsi_request *vm_srb;
338
339
340         vm_srb = &blkvsc_req->request.vstor_packet.vm_srb;
341         blkdev->num_outstanding_reqs--;
342
343         if (vm_srb->scsi_status)
344                 if (scsi_normalize_sense(blkvsc_req->sense_buffer,
345                                          SCSI_SENSE_BUFFERSIZE, &sense_hdr))
346                         scsi_print_sense_hdr("blkvsc", &sense_hdr);
347
348         complete(&blkvsc_req->request.wait_event);
349 }
350
351
352 static int blkvsc_do_operation(struct block_device_context *blkdev,
353                                 enum blkvsc_op_type op)
354 {
355         struct blkvsc_request *blkvsc_req;
356         struct page *page_buf;
357         unsigned char *buf;
358         unsigned char device_type;
359         struct scsi_sense_hdr sense_hdr;
360         struct vmscsi_request *vm_srb;
361
362         int ret = 0;
363
364         blkvsc_req = kmem_cache_zalloc(blkdev->request_pool, GFP_KERNEL);
365         if (!blkvsc_req)
366                 return -ENOMEM;
367
368         page_buf = alloc_page(GFP_KERNEL);
369         if (!page_buf) {
370                 kmem_cache_free(blkvsc_req->dev->request_pool, blkvsc_req);
371                 return -ENOMEM;
372         }
373
374         vm_srb = &blkvsc_req->request.vstor_packet.vm_srb;
375         init_completion(&blkvsc_req->request.wait_event);
376         blkvsc_req->dev = blkdev;
377         blkvsc_req->req = NULL;
378         blkvsc_req->write = 0;
379
380         blkvsc_req->request.data_buffer.pfn_array[0] =
381         page_to_pfn(page_buf);
382         blkvsc_req->request.data_buffer.offset = 0;
383
384         switch (op) {
385         case DO_INQUIRY:
386                 blkvsc_req->cmnd[0] = INQUIRY;
387                 blkvsc_req->cmnd[1] = 0x1;              /* Get product data */
388                 blkvsc_req->cmnd[2] = 0x83;             /* mode page 83 */
389                 blkvsc_req->cmnd[4] = 64;
390                 blkvsc_req->cmd_len = 6;
391                 blkvsc_req->request.data_buffer.len = 64;
392                 break;
393
394         case DO_CAPACITY:
395                 blkdev->sector_size = 0;
396                 blkdev->capacity = 0;
397
398                 blkvsc_req->cmnd[0] = READ_CAPACITY;
399                 blkvsc_req->cmd_len = 16;
400                 blkvsc_req->request.data_buffer.len = 8;
401                 break;
402
403         case DO_FLUSH:
404                 blkvsc_req->cmnd[0] = SYNCHRONIZE_CACHE;
405                 blkvsc_req->cmd_len = 10;
406                 blkvsc_req->request.data_buffer.pfn_array[0] = 0;
407                 blkvsc_req->request.data_buffer.len = 0;
408                 break;
409         default:
410                 ret = -EINVAL;
411                 goto cleanup;
412         }
413
414         blkvsc_submit_request(blkvsc_req, blkvsc_cmd_completion);
415
416         wait_for_completion_interruptible(&blkvsc_req->request.wait_event);
417
418         /* check error */
419         if (vm_srb->scsi_status) {
420                 scsi_normalize_sense(blkvsc_req->sense_buffer,
421                                      SCSI_SENSE_BUFFERSIZE, &sense_hdr);
422
423                 return 0;
424         }
425
426         buf = kmap(page_buf);
427
428         switch (op) {
429         case DO_INQUIRY:
430                 device_type = buf[0] & 0x1F;
431
432                 if (device_type == 0x0)
433                         blkdev->device_type = HARDDISK_TYPE;
434                  else
435                         blkdev->device_type = UNKNOWN_DEV_TYPE;
436
437                 blkdev->device_id_len = buf[7];
438                 if (blkdev->device_id_len > 64)
439                         blkdev->device_id_len = 64;
440
441                 memcpy(blkdev->device_id, &buf[8], blkdev->device_id_len);
442                 break;
443
444         case DO_CAPACITY:
445                 /* be to le */
446                 blkdev->capacity =
447                 ((buf[0] << 24) | (buf[1] << 16) |
448                 (buf[2] << 8) | buf[3]) + 1;
449
450                 blkdev->sector_size =
451                 (buf[4] << 24) | (buf[5] << 16) |
452                 (buf[6] << 8) | buf[7];
453                 break;
454         default:
455                 break;
456
457         }
458
459 cleanup:
460
461         kunmap(page_buf);
462
463         __free_page(page_buf);
464
465         kmem_cache_free(blkvsc_req->dev->request_pool, blkvsc_req);
466
467         return ret;
468 }
469
470
471 static int blkvsc_cancel_pending_reqs(struct block_device_context *blkdev)
472 {
473         struct blkvsc_request *pend_req, *tmp;
474         struct blkvsc_request *comp_req, *tmp2;
475         struct vmscsi_request *vm_srb;
476
477         int ret = 0;
478
479
480         /* Flush the pending list first */
481         list_for_each_entry_safe(pend_req, tmp, &blkdev->pending_list,
482                                  pend_entry) {
483                 /*
484                  * The pend_req could be part of a partially completed
485                  * request. If so, complete those req first until we
486                  * hit the pend_req
487                  */
488                 list_for_each_entry_safe(comp_req, tmp2,
489                                          &pend_req->group->blkvsc_req_list,
490                                          req_entry) {
491
492                         if (comp_req == pend_req)
493                                 break;
494
495                         list_del(&comp_req->req_entry);
496
497                         if (comp_req->req) {
498                                 vm_srb =
499                                 &comp_req->request.vstor_packet.
500                                 vm_srb;
501                                 ret = __blk_end_request(comp_req->req,
502                                         (!vm_srb->scsi_status ? 0 : -EIO),
503                                         comp_req->sector_count *
504                                         blkdev->sector_size);
505
506                                 /* FIXME: shouldn't this do more than return? */
507                                 if (ret)
508                                         goto out;
509                         }
510
511                         kmem_cache_free(blkdev->request_pool, comp_req);
512                 }
513
514                 list_del(&pend_req->pend_entry);
515
516                 list_del(&pend_req->req_entry);
517
518                 if (comp_req->req) {
519                         if (!__blk_end_request(pend_req->req, -EIO,
520                                                pend_req->sector_count *
521                                                blkdev->sector_size)) {
522                                 /*
523                                  * All the sectors have been xferred ie the
524                                  * request is done
525                                  */
526                                 kmem_cache_free(blkdev->request_pool,
527                                                 pend_req->group);
528                         }
529                 }
530
531                 kmem_cache_free(blkdev->request_pool, pend_req);
532         }
533
534 out:
535         return ret;
536 }
537
538
539 /*
540  * blkvsc_remove() - Callback when our device is removed
541  */
542 static int blkvsc_remove(struct device *device)
543 {
544         struct hv_driver *drv =
545                                 drv_to_hv_drv(device->driver);
546         struct storvsc_driver_object *storvsc_drv_obj =
547                                 drv->priv;
548         struct hv_device *device_obj = device_to_hv_device(device);
549         struct block_device_context *blkdev = dev_get_drvdata(device);
550         unsigned long flags;
551
552         /*
553          * Call to the vsc driver to let it know that the device is being
554          * removed
555          */
556         storvsc_drv_obj->base.dev_rm(device_obj);
557
558         /* Get to a known state */
559         spin_lock_irqsave(&blkdev->lock, flags);
560
561         blkdev->shutting_down = 1;
562
563         blk_stop_queue(blkdev->gd->queue);
564
565         spin_unlock_irqrestore(&blkdev->lock, flags);
566
567         while (blkdev->num_outstanding_reqs) {
568                 DPRINT_INFO(STORVSC, "waiting for %d requests to complete...",
569                             blkdev->num_outstanding_reqs);
570                 udelay(100);
571         }
572
573         blkvsc_do_operation(blkdev, DO_FLUSH);
574
575         spin_lock_irqsave(&blkdev->lock, flags);
576
577         blkvsc_cancel_pending_reqs(blkdev);
578
579         spin_unlock_irqrestore(&blkdev->lock, flags);
580
581         blk_cleanup_queue(blkdev->gd->queue);
582
583         del_gendisk(blkdev->gd);
584
585         kmem_cache_destroy(blkdev->request_pool);
586
587         kfree(blkdev);
588
589         return 0;
590
591 }
592
593 static void blkvsc_shutdown(struct device *device)
594 {
595         struct block_device_context *blkdev = dev_get_drvdata(device);
596         unsigned long flags;
597
598         if (!blkdev)
599                 return;
600
601         spin_lock_irqsave(&blkdev->lock, flags);
602
603         blkdev->shutting_down = 1;
604
605         blk_stop_queue(blkdev->gd->queue);
606
607         spin_unlock_irqrestore(&blkdev->lock, flags);
608
609         while (blkdev->num_outstanding_reqs) {
610                 DPRINT_INFO(STORVSC, "waiting for %d requests to complete...",
611                             blkdev->num_outstanding_reqs);
612                 udelay(100);
613         }
614
615         blkvsc_do_operation(blkdev, DO_FLUSH);
616
617         spin_lock_irqsave(&blkdev->lock, flags);
618
619         blkvsc_cancel_pending_reqs(blkdev);
620
621         spin_unlock_irqrestore(&blkdev->lock, flags);
622 }
623
624 static int blkvsc_release(struct gendisk *disk, fmode_t mode)
625 {
626         struct block_device_context *blkdev = disk->private_data;
627
628         mutex_lock(&blkvsc_mutex);
629         spin_lock(&blkdev->lock);
630         if (blkdev->users == 1) {
631                 spin_unlock(&blkdev->lock);
632                 blkvsc_do_operation(blkdev, DO_FLUSH);
633                 spin_lock(&blkdev->lock);
634         }
635
636         blkdev->users--;
637
638         spin_unlock(&blkdev->lock);
639         mutex_unlock(&blkvsc_mutex);
640         return 0;
641 }
642
643
644 /*
645  * We break the request into 1 or more blkvsc_requests and submit
646  * them.  If we cant submit them all, we put them on the
647  * pending_list. The blkvsc_request() will work on the pending_list.
648  */
649 static int blkvsc_do_request(struct block_device_context *blkdev,
650                              struct request *req)
651 {
652         struct bio *bio = NULL;
653         struct bio_vec *bvec = NULL;
654         struct bio_vec *prev_bvec = NULL;
655         struct blkvsc_request *blkvsc_req = NULL;
656         struct blkvsc_request *tmp;
657         int databuf_idx = 0;
658         int seg_idx = 0;
659         sector_t start_sector;
660         unsigned long num_sectors = 0;
661         int ret = 0;
662         int pending = 0;
663         struct blkvsc_request_group *group = NULL;
664
665         /* Create a group to tie req to list of blkvsc_reqs */
666         group = kmem_cache_zalloc(blkdev->request_pool, GFP_ATOMIC);
667         if (!group)
668                 return -ENOMEM;
669
670         INIT_LIST_HEAD(&group->blkvsc_req_list);
671         group->outstanding = group->status = 0;
672
673         start_sector = blk_rq_pos(req);
674
675         /* foreach bio in the request */
676         if (req->bio) {
677                 for (bio = req->bio; bio; bio = bio->bi_next) {
678                         /*
679                          * Map this bio into an existing or new storvsc request
680                          */
681                         bio_for_each_segment(bvec, bio, seg_idx) {
682                                 /* Get a new storvsc request */
683                                 /* 1st-time */
684                                 if ((!blkvsc_req) ||
685                                     (databuf_idx >= MAX_MULTIPAGE_BUFFER_COUNT)
686                                     /* hole at the begin of page */
687                                     || (bvec->bv_offset != 0) ||
688                                     /* hold at the end of page */
689                                     (prev_bvec &&
690                                      (prev_bvec->bv_len != PAGE_SIZE))) {
691                                         /* submit the prev one */
692                                         if (blkvsc_req) {
693                                                 blkvsc_req->sector_start =
694                                                 start_sector;
695                                                 sector_div(
696                                                 blkvsc_req->sector_start,
697                                                 (blkdev->sector_size >> 9));
698
699                                                 blkvsc_req->sector_count =
700                                                 num_sectors /
701                                                 (blkdev->sector_size >> 9);
702                                                 blkvsc_init_rw(blkvsc_req);
703                                         }
704
705                                         /*
706                                          * Create new blkvsc_req to represent
707                                          * the current bvec
708                                          */
709                                         blkvsc_req =
710                                         kmem_cache_zalloc(
711                                         blkdev->request_pool, GFP_ATOMIC);
712                                         if (!blkvsc_req) {
713                                                 /* free up everything */
714                                                 list_for_each_entry_safe(
715                                                         blkvsc_req, tmp,
716                                                         &group->blkvsc_req_list,
717                                                         req_entry) {
718                                                         list_del(
719                                                         &blkvsc_req->req_entry);
720                                                         kmem_cache_free(
721                                                         blkdev->request_pool,
722                                                         blkvsc_req);
723                                                 }
724
725                                                 kmem_cache_free(
726                                                 blkdev->request_pool, group);
727                                                 return -ENOMEM;
728                                         }
729
730                                         memset(blkvsc_req, 0,
731                                                sizeof(struct blkvsc_request));
732
733                                         blkvsc_req->dev = blkdev;
734                                         blkvsc_req->req = req;
735                                         blkvsc_req->request.
736                                         data_buffer.offset
737                                         = bvec->bv_offset;
738                                         blkvsc_req->request.
739                                         data_buffer.len = 0;
740
741                                         /* Add to the group */
742                                         blkvsc_req->group = group;
743                                         blkvsc_req->group->outstanding++;
744                                         list_add_tail(&blkvsc_req->req_entry,
745                                         &blkvsc_req->group->blkvsc_req_list);
746
747                                         start_sector += num_sectors;
748                                         num_sectors = 0;
749                                         databuf_idx = 0;
750                                 }
751
752                                 /*
753                                  * Add the curr bvec/segment to the curr
754                                  * blkvsc_req
755                                  */
756                                 blkvsc_req->request.data_buffer.
757                                         pfn_array[databuf_idx]
758                                                 = page_to_pfn(bvec->bv_page);
759                                 blkvsc_req->request.data_buffer.len
760                                         += bvec->bv_len;
761
762                                 prev_bvec = bvec;
763
764                                 databuf_idx++;
765                                 num_sectors += bvec->bv_len >> 9;
766
767                         } /* bio_for_each_segment */
768
769                 } /* rq_for_each_bio */
770         }
771
772         /* Handle the last one */
773         if (blkvsc_req) {
774                 blkvsc_req->sector_start = start_sector;
775                 sector_div(blkvsc_req->sector_start,
776                            (blkdev->sector_size >> 9));
777
778                 blkvsc_req->sector_count = num_sectors /
779                                            (blkdev->sector_size >> 9);
780
781                 blkvsc_init_rw(blkvsc_req);
782         }
783
784         list_for_each_entry(blkvsc_req, &group->blkvsc_req_list, req_entry) {
785                 if (pending) {
786
787                         list_add_tail(&blkvsc_req->pend_entry,
788                                       &blkdev->pending_list);
789                 } else {
790                         ret = blkvsc_submit_request(blkvsc_req,
791                                                     blkvsc_request_completion);
792                         if (ret == -1) {
793                                 pending = 1;
794                                 list_add_tail(&blkvsc_req->pend_entry,
795                                               &blkdev->pending_list);
796                         }
797
798                 }
799         }
800
801         return pending;
802 }
803
804 static int blkvsc_do_pending_reqs(struct block_device_context *blkdev)
805 {
806         struct blkvsc_request *pend_req, *tmp;
807         int ret = 0;
808
809         /* Flush the pending list first */
810         list_for_each_entry_safe(pend_req, tmp, &blkdev->pending_list,
811                                  pend_entry) {
812
813                 ret = blkvsc_submit_request(pend_req,
814                                             blkvsc_request_completion);
815                 if (ret != 0)
816                         break;
817                 else
818                         list_del(&pend_req->pend_entry);
819         }
820
821         return ret;
822 }
823
824
825 static void blkvsc_request(struct request_queue *queue)
826 {
827         struct block_device_context *blkdev = NULL;
828         struct request *req;
829         int ret = 0;
830
831         while ((req = blk_peek_request(queue)) != NULL) {
832
833                 blkdev = req->rq_disk->private_data;
834                 if (blkdev->shutting_down || req->cmd_type != REQ_TYPE_FS) {
835                         __blk_end_request_cur(req, 0);
836                         continue;
837                 }
838
839                 ret = blkvsc_do_pending_reqs(blkdev);
840
841                 if (ret != 0) {
842                         blk_stop_queue(queue);
843                         break;
844                 }
845
846                 blk_start_request(req);
847
848                 ret = blkvsc_do_request(blkdev, req);
849                 if (ret > 0) {
850                         blk_stop_queue(queue);
851                         break;
852                 } else if (ret < 0) {
853                         blk_requeue_request(queue, req);
854                         blk_stop_queue(queue);
855                         break;
856                 }
857         }
858 }
859
860
861
862 /* The one and only one */
863 static  struct storvsc_driver_object g_blkvsc_drv;
864
865 static const struct block_device_operations block_ops = {
866         .owner = THIS_MODULE,
867         .open = blkvsc_open,
868         .release = blkvsc_release,
869         .getgeo = blkvsc_getgeo,
870         .ioctl  = blkvsc_ioctl,
871 };
872
873 /*
874  * blkvsc_drv_init -  BlkVsc driver initialization.
875  */
876 static int blkvsc_drv_init(void)
877 {
878         struct storvsc_driver_object *storvsc_drv_obj = &g_blkvsc_drv;
879         struct hv_driver *drv = &g_blkvsc_drv.base;
880         int ret;
881
882         storvsc_drv_obj->ring_buffer_size = blkvsc_ringbuffer_size;
883
884         drv->priv = storvsc_drv_obj;
885
886         /* Callback to client driver to complete the initialization */
887         blk_vsc_initialize(&storvsc_drv_obj->base);
888
889         drv->driver.name = storvsc_drv_obj->base.name;
890
891         drv->driver.probe = blkvsc_probe;
892         drv->driver.remove = blkvsc_remove;
893         drv->driver.shutdown = blkvsc_shutdown;
894
895         /* The driver belongs to vmbus */
896         ret = vmbus_child_driver_register(&drv->driver);
897
898         return ret;
899 }
900
901 static int blkvsc_drv_exit_cb(struct device *dev, void *data)
902 {
903         struct device **curr = (struct device **)data;
904         *curr = dev;
905         return 1; /* stop iterating */
906 }
907
908 static void blkvsc_drv_exit(void)
909 {
910         struct storvsc_driver_object *storvsc_drv_obj = &g_blkvsc_drv;
911         struct hv_driver *drv = &g_blkvsc_drv.base;
912         struct device *current_dev;
913         int ret;
914
915         while (1) {
916                 current_dev = NULL;
917
918                 /* Get the device */
919                 ret = driver_for_each_device(&drv->driver, NULL,
920                                              (void *) &current_dev,
921                                              blkvsc_drv_exit_cb);
922
923                 if (ret)
924                         DPRINT_WARN(BLKVSC_DRV,
925                                     "driver_for_each_device returned %d", ret);
926
927
928                 if (current_dev == NULL)
929                         break;
930
931                 /* Initiate removal from the top-down */
932                 device_unregister(current_dev);
933         }
934
935         if (storvsc_drv_obj->base.cleanup)
936                 storvsc_drv_obj->base.cleanup(&storvsc_drv_obj->base);
937
938         vmbus_child_driver_unregister(&drv->driver);
939
940         return;
941 }
942
943 /*
944  * blkvsc_probe - Add a new device for this driver
945  */
946 static int blkvsc_probe(struct device *device)
947 {
948         struct hv_driver *drv =
949                                 drv_to_hv_drv(device->driver);
950         struct storvsc_driver_object *storvsc_drv_obj =
951                                 drv->priv;
952         struct hv_device *device_obj = device_to_hv_device(device);
953
954         struct block_device_context *blkdev = NULL;
955         struct storvsc_device_info device_info;
956         struct storvsc_major_info major_info;
957         int ret = 0;
958
959
960         blkdev = kzalloc(sizeof(struct block_device_context), GFP_KERNEL);
961         if (!blkdev) {
962                 ret = -ENOMEM;
963                 goto cleanup;
964         }
965
966         INIT_LIST_HEAD(&blkdev->pending_list);
967
968         /* Initialize what we can here */
969         spin_lock_init(&blkdev->lock);
970
971
972         blkdev->request_pool = kmem_cache_create(dev_name(&device_obj->device),
973                                         sizeof(struct blkvsc_request), 0,
974                                         SLAB_HWCACHE_ALIGN, NULL);
975         if (!blkdev->request_pool) {
976                 ret = -ENOMEM;
977                 goto cleanup;
978         }
979
980
981         /* Call to the vsc driver to add the device */
982         ret = storvsc_drv_obj->base.dev_add(device_obj, &device_info);
983         if (ret != 0)
984                 goto cleanup;
985
986         blkdev->device_ctx = device_obj;
987         /* this identified the device 0 or 1 */
988         blkdev->target = device_info.target_id;
989         /* this identified the ide ctrl 0 or 1 */
990         blkdev->path = device_info.path_id;
991
992         dev_set_drvdata(device, blkdev);
993
994         ret = stor_vsc_get_major_info(&device_info, &major_info);
995
996         if (ret)
997                 goto cleanup;
998
999         if (major_info.do_register) {
1000                 ret = register_blkdev(major_info.major, major_info.devname);
1001
1002                 if (ret != 0) {
1003                         DPRINT_ERR(BLKVSC_DRV,
1004                                    "register_blkdev() failed! ret %d", ret);
1005                         goto remove;
1006                 }
1007         }
1008
1009         DPRINT_INFO(BLKVSC_DRV, "blkvsc registered for major %d!!",
1010                         major_info.major);
1011
1012         blkdev->gd = alloc_disk(BLKVSC_MINORS);
1013         if (!blkdev->gd) {
1014                 ret = -1;
1015                 goto cleanup;
1016         }
1017
1018         blkdev->gd->queue = blk_init_queue(blkvsc_request, &blkdev->lock);
1019
1020         blk_queue_max_segment_size(blkdev->gd->queue, PAGE_SIZE);
1021         blk_queue_max_segments(blkdev->gd->queue, MAX_MULTIPAGE_BUFFER_COUNT);
1022         blk_queue_segment_boundary(blkdev->gd->queue, PAGE_SIZE-1);
1023         blk_queue_bounce_limit(blkdev->gd->queue, BLK_BOUNCE_ANY);
1024         blk_queue_dma_alignment(blkdev->gd->queue, 511);
1025
1026         blkdev->gd->major = major_info.major;
1027         if (major_info.index == 1 || major_info.index == 3)
1028                 blkdev->gd->first_minor = BLKVSC_MINORS;
1029         else
1030                 blkdev->gd->first_minor = 0;
1031         blkdev->gd->fops = &block_ops;
1032         blkdev->gd->events = DISK_EVENT_MEDIA_CHANGE;
1033         blkdev->gd->private_data = blkdev;
1034         blkdev->gd->driverfs_dev = &(blkdev->device_ctx->device);
1035         sprintf(blkdev->gd->disk_name, "hd%c", 'a' + major_info.index);
1036
1037         blkvsc_do_operation(blkdev, DO_INQUIRY);
1038         blkvsc_do_operation(blkdev, DO_CAPACITY);
1039
1040         set_capacity(blkdev->gd, blkdev->capacity * (blkdev->sector_size/512));
1041         blk_queue_logical_block_size(blkdev->gd->queue, blkdev->sector_size);
1042         /* go! */
1043         add_disk(blkdev->gd);
1044
1045         DPRINT_INFO(BLKVSC_DRV, "%s added!! capacity %lu sector_size %d",
1046                     blkdev->gd->disk_name, (unsigned long)blkdev->capacity,
1047                     blkdev->sector_size);
1048
1049         return ret;
1050
1051 remove:
1052         storvsc_drv_obj->base.dev_rm(device_obj);
1053
1054 cleanup:
1055         if (blkdev) {
1056                 if (blkdev->request_pool) {
1057                         kmem_cache_destroy(blkdev->request_pool);
1058                         blkdev->request_pool = NULL;
1059                 }
1060                 kfree(blkdev);
1061                 blkdev = NULL;
1062         }
1063
1064         return ret;
1065 }
1066
1067 static void blkvsc_request_completion(struct hv_storvsc_request *request)
1068 {
1069         struct blkvsc_request *blkvsc_req =
1070                         (struct blkvsc_request *)request->context;
1071         struct block_device_context *blkdev =
1072                         (struct block_device_context *)blkvsc_req->dev;
1073         unsigned long flags;
1074         struct blkvsc_request *comp_req, *tmp;
1075         struct vmscsi_request *vm_srb;
1076
1077
1078         spin_lock_irqsave(&blkdev->lock, flags);
1079
1080         blkdev->num_outstanding_reqs--;
1081         blkvsc_req->group->outstanding--;
1082
1083         /*
1084          * Only start processing when all the blkvsc_reqs are
1085          * completed. This guarantees no out-of-order blkvsc_req
1086          * completion when calling end_that_request_first()
1087          */
1088         if (blkvsc_req->group->outstanding == 0) {
1089                 list_for_each_entry_safe(comp_req, tmp,
1090                                          &blkvsc_req->group->blkvsc_req_list,
1091                                          req_entry) {
1092
1093                         list_del(&comp_req->req_entry);
1094
1095                         vm_srb =
1096                         &comp_req->request.vstor_packet.vm_srb;
1097                         if (!__blk_end_request(comp_req->req,
1098                                 (!vm_srb->scsi_status ? 0 : -EIO),
1099                                 comp_req->sector_count * blkdev->sector_size)) {
1100                                 /*
1101                                  * All the sectors have been xferred ie the
1102                                  * request is done
1103                                  */
1104                                 kmem_cache_free(blkdev->request_pool,
1105                                                 comp_req->group);
1106                         }
1107
1108                         kmem_cache_free(blkdev->request_pool, comp_req);
1109                 }
1110
1111                 if (!blkdev->shutting_down) {
1112                         blkvsc_do_pending_reqs(blkdev);
1113                         blk_start_queue(blkdev->gd->queue);
1114                         blkvsc_request(blkdev->gd->queue);
1115                 }
1116         }
1117
1118         spin_unlock_irqrestore(&blkdev->lock, flags);
1119 }
1120
1121 static int __init blkvsc_init(void)
1122 {
1123         int ret;
1124
1125         BUILD_BUG_ON(sizeof(sector_t) != 8);
1126
1127         ret = blkvsc_drv_init();
1128
1129         return ret;
1130 }
1131
1132 static void __exit blkvsc_exit(void)
1133 {
1134         blkvsc_drv_exit();
1135 }
1136
1137 MODULE_LICENSE("GPL");
1138 MODULE_VERSION(HV_DRV_VERSION);
1139 MODULE_DESCRIPTION("Microsoft Hyper-V virtual block driver");
1140 module_init(blkvsc_init);
1141 module_exit(blkvsc_exit);