2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 * Haiyang Zhang <haiyangz@microsoft.com>
19 * Hank Janssen <hjanssen@microsoft.com>
20 * K. Y. Srinivasan <kys@microsoft.com>
23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25 #include <linux/init.h>
26 #include <linux/module.h>
27 #include <linux/device.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/sysctl.h>
31 #include <linux/slab.h>
32 #include <linux/acpi.h>
33 #include <acpi/acpi_bus.h>
34 #include <linux/completion.h>
37 #include "hyperv_vmbus.h"
40 static struct acpi_device *hv_acpi_dev;
42 static struct tasklet_struct msg_dpc;
43 static struct tasklet_struct event_dpc;
45 unsigned int vmbus_loglevel = (ALL_MODULES << 16 | INFO_LVL);
46 EXPORT_SYMBOL(vmbus_loglevel);
48 static struct completion probe_event;
51 static void get_channel_info(struct hv_device *device,
52 struct hv_device_info *info)
54 struct vmbus_channel_debug_info debug_info;
59 vmbus_get_debug_info(device->channel, &debug_info);
61 info->chn_id = debug_info.relid;
62 info->chn_state = debug_info.state;
63 memcpy(&info->chn_type, &debug_info.interfacetype,
65 memcpy(&info->chn_instance, &debug_info.interface_instance,
68 info->monitor_id = debug_info.monitorid;
70 info->server_monitor_pending = debug_info.servermonitor_pending;
71 info->server_monitor_latency = debug_info.servermonitor_latency;
72 info->server_monitor_conn_id = debug_info.servermonitor_connectionid;
74 info->client_monitor_pending = debug_info.clientmonitor_pending;
75 info->client_monitor_latency = debug_info.clientmonitor_latency;
76 info->client_monitor_conn_id = debug_info.clientmonitor_connectionid;
78 info->inbound.int_mask = debug_info.inbound.current_interrupt_mask;
79 info->inbound.read_idx = debug_info.inbound.current_read_index;
80 info->inbound.write_idx = debug_info.inbound.current_write_index;
81 info->inbound.bytes_avail_toread =
82 debug_info.inbound.bytes_avail_toread;
83 info->inbound.bytes_avail_towrite =
84 debug_info.inbound.bytes_avail_towrite;
86 info->outbound.int_mask =
87 debug_info.outbound.current_interrupt_mask;
88 info->outbound.read_idx = debug_info.outbound.current_read_index;
89 info->outbound.write_idx = debug_info.outbound.current_write_index;
90 info->outbound.bytes_avail_toread =
91 debug_info.outbound.bytes_avail_toread;
92 info->outbound.bytes_avail_towrite =
93 debug_info.outbound.bytes_avail_towrite;
97 * vmbus_show_device_attr - Show the device attribute in sysfs.
99 * This is invoked when user does a
100 * "cat /sys/bus/vmbus/devices/<busdevice>/<attr name>"
102 static ssize_t vmbus_show_device_attr(struct device *dev,
103 struct device_attribute *dev_attr,
106 struct hv_device *hv_dev = device_to_hv_device(dev);
107 struct hv_device_info device_info;
109 memset(&device_info, 0, sizeof(struct hv_device_info));
111 get_channel_info(hv_dev, &device_info);
113 if (!strcmp(dev_attr->attr.name, "class_id")) {
114 return sprintf(buf, "{%02x%02x%02x%02x-%02x%02x-%02x%02x-"
115 "%02x%02x%02x%02x%02x%02x%02x%02x}\n",
116 device_info.chn_type.b[3],
117 device_info.chn_type.b[2],
118 device_info.chn_type.b[1],
119 device_info.chn_type.b[0],
120 device_info.chn_type.b[5],
121 device_info.chn_type.b[4],
122 device_info.chn_type.b[7],
123 device_info.chn_type.b[6],
124 device_info.chn_type.b[8],
125 device_info.chn_type.b[9],
126 device_info.chn_type.b[10],
127 device_info.chn_type.b[11],
128 device_info.chn_type.b[12],
129 device_info.chn_type.b[13],
130 device_info.chn_type.b[14],
131 device_info.chn_type.b[15]);
132 } else if (!strcmp(dev_attr->attr.name, "device_id")) {
133 return sprintf(buf, "{%02x%02x%02x%02x-%02x%02x-%02x%02x-"
134 "%02x%02x%02x%02x%02x%02x%02x%02x}\n",
135 device_info.chn_instance.b[3],
136 device_info.chn_instance.b[2],
137 device_info.chn_instance.b[1],
138 device_info.chn_instance.b[0],
139 device_info.chn_instance.b[5],
140 device_info.chn_instance.b[4],
141 device_info.chn_instance.b[7],
142 device_info.chn_instance.b[6],
143 device_info.chn_instance.b[8],
144 device_info.chn_instance.b[9],
145 device_info.chn_instance.b[10],
146 device_info.chn_instance.b[11],
147 device_info.chn_instance.b[12],
148 device_info.chn_instance.b[13],
149 device_info.chn_instance.b[14],
150 device_info.chn_instance.b[15]);
151 } else if (!strcmp(dev_attr->attr.name, "state")) {
152 return sprintf(buf, "%d\n", device_info.chn_state);
153 } else if (!strcmp(dev_attr->attr.name, "id")) {
154 return sprintf(buf, "%d\n", device_info.chn_id);
155 } else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) {
156 return sprintf(buf, "%d\n", device_info.outbound.int_mask);
157 } else if (!strcmp(dev_attr->attr.name, "out_read_index")) {
158 return sprintf(buf, "%d\n", device_info.outbound.read_idx);
159 } else if (!strcmp(dev_attr->attr.name, "out_write_index")) {
160 return sprintf(buf, "%d\n", device_info.outbound.write_idx);
161 } else if (!strcmp(dev_attr->attr.name, "out_read_bytes_avail")) {
162 return sprintf(buf, "%d\n",
163 device_info.outbound.bytes_avail_toread);
164 } else if (!strcmp(dev_attr->attr.name, "out_write_bytes_avail")) {
165 return sprintf(buf, "%d\n",
166 device_info.outbound.bytes_avail_towrite);
167 } else if (!strcmp(dev_attr->attr.name, "in_intr_mask")) {
168 return sprintf(buf, "%d\n", device_info.inbound.int_mask);
169 } else if (!strcmp(dev_attr->attr.name, "in_read_index")) {
170 return sprintf(buf, "%d\n", device_info.inbound.read_idx);
171 } else if (!strcmp(dev_attr->attr.name, "in_write_index")) {
172 return sprintf(buf, "%d\n", device_info.inbound.write_idx);
173 } else if (!strcmp(dev_attr->attr.name, "in_read_bytes_avail")) {
174 return sprintf(buf, "%d\n",
175 device_info.inbound.bytes_avail_toread);
176 } else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) {
177 return sprintf(buf, "%d\n",
178 device_info.inbound.bytes_avail_towrite);
179 } else if (!strcmp(dev_attr->attr.name, "monitor_id")) {
180 return sprintf(buf, "%d\n", device_info.monitor_id);
181 } else if (!strcmp(dev_attr->attr.name, "server_monitor_pending")) {
182 return sprintf(buf, "%d\n", device_info.server_monitor_pending);
183 } else if (!strcmp(dev_attr->attr.name, "server_monitor_latency")) {
184 return sprintf(buf, "%d\n", device_info.server_monitor_latency);
185 } else if (!strcmp(dev_attr->attr.name, "server_monitor_conn_id")) {
186 return sprintf(buf, "%d\n",
187 device_info.server_monitor_conn_id);
188 } else if (!strcmp(dev_attr->attr.name, "client_monitor_pending")) {
189 return sprintf(buf, "%d\n", device_info.client_monitor_pending);
190 } else if (!strcmp(dev_attr->attr.name, "client_monitor_latency")) {
191 return sprintf(buf, "%d\n", device_info.client_monitor_latency);
192 } else if (!strcmp(dev_attr->attr.name, "client_monitor_conn_id")) {
193 return sprintf(buf, "%d\n",
194 device_info.client_monitor_conn_id);
200 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
201 static struct device_attribute vmbus_device_attrs[] = {
202 __ATTR(id, S_IRUGO, vmbus_show_device_attr, NULL),
203 __ATTR(state, S_IRUGO, vmbus_show_device_attr, NULL),
204 __ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL),
205 __ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL),
206 __ATTR(monitor_id, S_IRUGO, vmbus_show_device_attr, NULL),
208 __ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL),
209 __ATTR(server_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL),
210 __ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
212 __ATTR(client_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL),
213 __ATTR(client_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL),
214 __ATTR(client_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
216 __ATTR(out_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL),
217 __ATTR(out_read_index, S_IRUGO, vmbus_show_device_attr, NULL),
218 __ATTR(out_write_index, S_IRUGO, vmbus_show_device_attr, NULL),
219 __ATTR(out_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL),
220 __ATTR(out_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL),
222 __ATTR(in_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL),
223 __ATTR(in_read_index, S_IRUGO, vmbus_show_device_attr, NULL),
224 __ATTR(in_write_index, S_IRUGO, vmbus_show_device_attr, NULL),
225 __ATTR(in_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL),
226 __ATTR(in_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL),
232 * vmbus_uevent - add uevent for our device
234 * This routine is invoked when a device is added or removed on the vmbus to
235 * generate a uevent to udev in the userspace. The udev will then look at its
236 * rule and the uevent generated here to load the appropriate driver
238 * The alias string will be of the form vmbus:guid where guid is the string
239 * representation of the device guid (each byte of the guid will be
240 * represented with two hex characters.
242 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
244 struct hv_device *dev = device_to_hv_device(device);
246 char alias_name[((sizeof((struct hv_vmbus_device_id *)0)->guid) + 1) * 2];
248 for (i = 0; i < ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2); i += 2)
249 sprintf(&alias_name[i], "%02x", dev->dev_type.b[i/2]);
251 ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
255 static uuid_le null_guid;
257 static inline bool is_null_guid(const __u8 *guid)
259 if (memcmp(guid, &null_guid, sizeof(uuid_le)))
266 * vmbus_match - Attempt to match the specified device to the specified driver
268 static int vmbus_match(struct device *device, struct device_driver *driver)
270 struct hv_driver *drv = drv_to_hv_drv(driver);
271 struct hv_device *hv_dev = device_to_hv_device(device);
272 const struct hv_vmbus_device_id *id_array = drv->id_table;
274 for (; !is_null_guid(id_array->guid); id_array++)
275 if (!memcmp(&id_array->guid, &hv_dev->dev_type.b,
283 * vmbus_probe - Add the new vmbus's child device
285 static int vmbus_probe(struct device *child_device)
288 struct hv_driver *drv =
289 drv_to_hv_drv(child_device->driver);
290 struct hv_device *dev = device_to_hv_device(child_device);
293 ret = drv->probe(dev);
295 pr_err("probe failed for device %s (%d)\n",
296 dev_name(child_device), ret);
299 pr_err("probe not set for driver %s\n",
300 dev_name(child_device));
307 * vmbus_remove - Remove a vmbus device
309 static int vmbus_remove(struct device *child_device)
312 struct hv_driver *drv;
314 struct hv_device *dev = device_to_hv_device(child_device);
316 if (child_device->driver) {
317 drv = drv_to_hv_drv(child_device->driver);
320 ret = drv->remove(dev);
322 pr_err("remove not set for driver %s\n",
323 dev_name(child_device));
333 * vmbus_shutdown - Shutdown a vmbus device
335 static void vmbus_shutdown(struct device *child_device)
337 struct hv_driver *drv;
338 struct hv_device *dev = device_to_hv_device(child_device);
341 /* The device may not be attached yet */
342 if (!child_device->driver)
345 drv = drv_to_hv_drv(child_device->driver);
355 * vmbus_device_release - Final callback release of the vmbus child device
357 static void vmbus_device_release(struct device *device)
359 struct hv_device *hv_dev = device_to_hv_device(device);
365 /* The one and only one */
366 static struct bus_type hv_bus = {
368 .match = vmbus_match,
369 .shutdown = vmbus_shutdown,
370 .remove = vmbus_remove,
371 .probe = vmbus_probe,
372 .uevent = vmbus_uevent,
373 .dev_attrs = vmbus_device_attrs,
376 static const char *driver_name = "hyperv";
379 struct onmessage_work_context {
380 struct work_struct work;
381 struct hv_message msg;
384 static void vmbus_onmessage_work(struct work_struct *work)
386 struct onmessage_work_context *ctx;
388 ctx = container_of(work, struct onmessage_work_context,
390 vmbus_onmessage(&ctx->msg);
395 * vmbus_on_msg_dpc - DPC routine to handle messages from the hypervisior
397 static void vmbus_on_msg_dpc(unsigned long data)
399 int cpu = smp_processor_id();
400 void *page_addr = hv_context.synic_message_page[cpu];
401 struct hv_message *msg = (struct hv_message *)page_addr +
403 struct onmessage_work_context *ctx;
406 if (msg->header.message_type == HVMSG_NONE) {
410 ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
413 INIT_WORK(&ctx->work, vmbus_onmessage_work);
414 memcpy(&ctx->msg, msg, sizeof(*msg));
415 queue_work(vmbus_connection.work_queue, &ctx->work);
418 msg->header.message_type = HVMSG_NONE;
421 * Make sure the write to MessageType (ie set to
422 * HVMSG_NONE) happens before we read the
423 * MessagePending and EOMing. Otherwise, the EOMing
424 * will not deliver any more messages since there is
429 if (msg->header.message_flags.msg_pending) {
431 * This will cause message queue rescan to
432 * possibly deliver another msg from the
435 wrmsrl(HV_X64_MSR_EOM, 0);
440 static irqreturn_t vmbus_isr(int irq, void *dev_id)
442 int cpu = smp_processor_id();
444 struct hv_message *msg;
445 union hv_synic_event_flags *event;
446 bool handled = false;
448 page_addr = hv_context.synic_message_page[cpu];
449 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
451 /* Check if there are actual msgs to be process */
452 if (msg->header.message_type != HVMSG_NONE) {
454 tasklet_schedule(&msg_dpc);
457 page_addr = hv_context.synic_event_page[cpu];
458 event = (union hv_synic_event_flags *)page_addr + VMBUS_MESSAGE_SINT;
460 /* Since we are a child, we only need to check bit 0 */
461 if (sync_test_and_clear_bit(0, (unsigned long *) &event->flags32[0])) {
463 tasklet_schedule(&event_dpc);
473 * vmbus_bus_init -Main vmbus driver initialization routine.
476 * - initialize the vmbus driver context
477 * - invoke the vmbus hv main init routine
478 * - get the irq resource
479 * - retrieve the channel offers
481 static int vmbus_bus_init(int irq)
486 /* Hypervisor initialization...setup hypercall page..etc */
489 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
493 /* Initialize the bus context */
494 tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0);
495 tasklet_init(&event_dpc, vmbus_on_event, 0);
497 /* Now, register the bus with LDM */
498 ret = bus_register(&hv_bus);
502 /* Get the interrupt resource */
503 ret = request_irq(irq, vmbus_isr, IRQF_SAMPLE_RANDOM,
504 driver_name, hv_acpi_dev);
507 pr_err("Unable to request IRQ %d\n",
510 bus_unregister(&hv_bus);
515 vector = IRQ0_VECTOR + irq;
518 * Notify the hypervisor of our irq and
519 * connect to the host.
521 on_each_cpu(hv_synic_init, (void *)&vector, 1);
522 ret = vmbus_connect();
524 free_irq(irq, hv_acpi_dev);
525 bus_unregister(&hv_bus);
530 vmbus_request_offers();
536 * __vmbus_child_driver_register - Register a vmbus's driver
537 * @drv: Pointer to driver structure you want to register
538 * @owner: owner module of the drv
539 * @mod_name: module name string
541 * Registers the given driver with Linux through the 'driver_register()' call
542 * and sets up the hyper-v vmbus handling for this driver.
543 * It will return the state of the 'driver_register()' call.
546 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name)
550 pr_info("registering driver %s\n", hv_driver->name);
552 hv_driver->driver.name = hv_driver->name;
553 hv_driver->driver.owner = owner;
554 hv_driver->driver.mod_name = mod_name;
555 hv_driver->driver.bus = &hv_bus;
557 ret = driver_register(&hv_driver->driver);
559 vmbus_request_offers();
563 EXPORT_SYMBOL_GPL(__vmbus_driver_register);
566 * vmbus_driver_unregister() - Unregister a vmbus's driver
567 * @drv: Pointer to driver structure you want to un-register
569 * Un-register the given driver that was previous registered with a call to
570 * vmbus_driver_register()
572 void vmbus_driver_unregister(struct hv_driver *hv_driver)
574 pr_info("unregistering driver %s\n", hv_driver->name);
576 driver_unregister(&hv_driver->driver);
579 EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
582 * vmbus_child_device_create - Creates and registers a new child device
585 struct hv_device *vmbus_child_device_create(uuid_le *type,
587 struct vmbus_channel *channel)
589 struct hv_device *child_device_obj;
591 /* Allocate the new child device */
592 child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL);
593 if (!child_device_obj) {
594 pr_err("Unable to allocate device object for child device\n");
598 child_device_obj->channel = channel;
599 memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
600 memcpy(&child_device_obj->dev_instance, instance,
604 return child_device_obj;
608 * vmbus_child_device_register - Register the child device
610 int vmbus_child_device_register(struct hv_device *child_device_obj)
614 static atomic_t device_num = ATOMIC_INIT(0);
616 /* Set the device name. Otherwise, device_register() will fail. */
617 dev_set_name(&child_device_obj->device, "vmbus_0_%d",
618 atomic_inc_return(&device_num));
620 /* The new device belongs to this bus */
621 child_device_obj->device.bus = &hv_bus; /* device->dev.bus; */
622 child_device_obj->device.parent = &hv_acpi_dev->dev;
623 child_device_obj->device.release = vmbus_device_release;
626 * Register with the LDM. This will kick off the driver/device
627 * binding...which will eventually call vmbus_match() and vmbus_probe()
629 ret = device_register(&child_device_obj->device);
632 pr_err("Unable to register child device\n");
634 pr_info("child device %s registered\n",
635 dev_name(&child_device_obj->device));
641 * vmbus_child_device_unregister - Remove the specified child device
644 void vmbus_child_device_unregister(struct hv_device *device_obj)
647 * Kick off the process of unregistering the device.
648 * This will call vmbus_remove() and eventually vmbus_device_release()
650 device_unregister(&device_obj->device);
652 pr_info("child device %s unregistered\n",
653 dev_name(&device_obj->device));
658 * VMBUS is an acpi enumerated device. Get the the IRQ information
662 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *irq)
665 if (res->type == ACPI_RESOURCE_TYPE_IRQ) {
666 struct acpi_resource_irq *irqp;
667 irqp = &res->data.irq;
669 *((unsigned int *)irq) = irqp->interrupts[0];
675 static int vmbus_acpi_add(struct acpi_device *device)
679 hv_acpi_dev = device;
682 acpi_walk_resources(device->handle, METHOD_NAME__CRS,
683 vmbus_walk_resources, &irq);
685 if (ACPI_FAILURE(result)) {
686 complete(&probe_event);
689 complete(&probe_event);
693 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
698 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
700 static struct acpi_driver vmbus_acpi_driver = {
702 .ids = vmbus_acpi_device_ids,
704 .add = vmbus_acpi_add,
708 static int __init hv_acpi_init(void)
712 init_completion(&probe_event);
715 * Get irq resources first.
718 ret = acpi_bus_register_driver(&vmbus_acpi_driver);
723 t = wait_for_completion_timeout(&probe_event, 5*HZ);
734 ret = vmbus_bus_init(irq);
741 acpi_bus_unregister_driver(&vmbus_acpi_driver);
746 MODULE_LICENSE("GPL");
747 MODULE_VERSION(HV_DRV_VERSION);
748 module_param(vmbus_loglevel, int, S_IRUGO|S_IWUSR);
750 module_init(hv_acpi_init);