From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1030181Ab2KUUfc (ORCPT ); Wed, 21 Nov 2012 15:35:32 -0500 Received: from smtp-outbound-1.vmware.com ([208.91.2.12]:45296 "EHLO smtp-outbound-1.vmware.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932066Ab2KUUf2 (ORCPT ); Wed, 21 Nov 2012 15:35:28 -0500 Subject: [PATCH 10/12] VMCI: guest side driver implementation. To: linux-kernel@vger.kernel.org, georgezhang@vmware.com, virtualization@lists.linux-foundation.org From: George Zhang Cc: pv-drivers@vmware.com, gregkh@linuxfoundation.org Date: Wed, 21 Nov 2012 12:35:27 -0800 Message-ID: <20121121203515.13252.32393.stgit@promb-2n-dhcp175.eng.vmware.com> In-Reply-To: <20121121202625.13252.86346.stgit@promb-2n-dhcp175.eng.vmware.com> References: <20121121202625.13252.86346.stgit@promb-2n-dhcp175.eng.vmware.com> User-Agent: StGit/0.15 MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org VMCI guest side driver code implementation. Signed-off-by: Dmitry Torokhov Signed-off-by: Andy King Signed-off-by: George Zhang --- drivers/misc/vmw_vmci/vmci_guest.c | 757 ++++++++++++++++++++++++++++++++++++ 1 files changed, 757 insertions(+), 0 deletions(-) create mode 100644 drivers/misc/vmw_vmci/vmci_guest.c diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c new file mode 100644 index 0000000..bcbe8ab --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -0,0 +1,757 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vmci_common_int.h" +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" + +#define VMCI_UTIL_NUM_RESOURCES 1 + +static bool vmci_disable_msi; +module_param_named(disable_msi, vmci_disable_msi, bool, 0); +MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); + +static bool vmci_disable_msix; +module_param_named(disable_msix, vmci_disable_msix, bool, 0); +MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); + +static u32 ctx_update_sub_id = VMCI_INVALID_ID; +static u32 vm_context_id = VMCI_INVALID_ID; + +struct vmci_guest_device { + struct device *dev; /* PCI device we are attached to */ + void __iomem *iobase; + + unsigned int irq; + unsigned int intr_type; + bool exclusive_vectors; + struct msix_entry msix_entries[VMCI_MAX_INTRS]; + + struct tasklet_struct datagram_tasklet; + struct tasklet_struct bm_tasklet; + + void *data_buffer; + void *notification_bitmap; +}; + +/* vmci_dev singleton device and supporting data*/ +static struct vmci_guest_device *vmci_dev_g; +static DEFINE_SPINLOCK(vmci_dev_spinlock); + +static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); + +bool vmci_guest_code_active(void) +{ + return atomic_read(&vmci_num_guest_devices) != 0; +} + +u32 vmci_get_vm_context_id(void) +{ + if (vm_context_id == VMCI_INVALID_ID) { + u32 result; + struct vmci_datagram get_cid_msg; + get_cid_msg.dst = + vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_GET_CONTEXT_ID); + get_cid_msg.src = VMCI_ANON_SRC_HANDLE; + get_cid_msg.payload_size = 0; + result = vmci_send_datagram(&get_cid_msg); + if (result >= 0) + vm_context_id = result; + } + return vm_context_id; +} + +/* + * VM to hypervisor call mechanism. We use the standard VMware naming + * convention since shared code is calling this function as well. + */ +int vmci_send_datagram(struct vmci_datagram *dg) +{ + unsigned long flags; + int result; + + /* Check args. */ + if (dg == NULL) + return VMCI_ERROR_INVALID_ARGS; + + /* + * Need to acquire spinlock on the device because the datagram + * data may be spread over multiple pages and the monitor may + * interleave device user rpc calls from multiple + * VCPUs. Acquiring the spinlock precludes that + * possibility. Disabling interrupts to avoid incoming + * datagrams during a "rep out" and possibly landing up in + * this function. + */ + spin_lock_irqsave(&vmci_dev_spinlock, flags); + + if (vmci_dev_g) { + iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR, + dg, VMCI_DG_SIZE(dg)); + result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR); + } else { + result = VMCI_ERROR_UNAVAILABLE; + } + + spin_unlock_irqrestore(&vmci_dev_spinlock, flags); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_send_datagram); + +/* + * Gets called with the new context id if updated or resumed. + * Context id. + */ +static void vmci_guest_cid_update(u32 sub_id, + const struct vmci_event_data *event_data, + void *client_data) +{ + const struct vmci_event_payld_ctx *ev_payload = + vmci_event_data_const_payload(event_data); + + if (sub_id != ctx_update_sub_id) { + pr_devel("Invalid subscriber (ID=0x%x).\n", sub_id); + return; + } + + if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { + pr_devel("Invalid event data.\n"); + return; + } + + pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d).\n", + vm_context_id, ev_payload->context_id, event_data->event); + + vm_context_id = ev_payload->context_id; +} + +/* + * Verify that the host supports the hypercalls we need. If it does not, + * try to find fallback hypercalls and use those instead. Returns + * true if required hypercalls (or fallback hypercalls) are + * supported by the host, false otherwise. + */ +static bool vmci_check_host_caps(struct pci_dev *pdev) +{ + bool result; + struct vmci_resource_query_msg *msg; + u32 msg_size = sizeof(struct vmci_resource_query_hdr) + + VMCI_UTIL_NUM_RESOURCES * sizeof(u32); + struct vmci_datagram *check_msg; + + check_msg = kmalloc(msg_size, GFP_KERNEL); + if (!check_msg) { + dev_err(&pdev->dev, "%s: Insufficient memory.\n", __func__); + return false; + } + + check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_RESOURCES_QUERY); + check_msg->src = VMCI_ANON_SRC_HANDLE; + check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; + msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); + + msg->num_resources = VMCI_UTIL_NUM_RESOURCES; + msg->resources[0] = VMCI_GET_CONTEXT_ID; + + /* Checks that hyper calls are supported */ + result = vmci_send_datagram(check_msg) == 0x01; + kfree(check_msg); + + dev_dbg(&pdev->dev, "%s: Host capability check: %s.\n", + __func__, result ? "PASSED" : "FAILED"); + + /* We need the vector. There are no fallbacks. */ + return result; +} + +/* + * Reads datagrams from the data in port and dispatches them. We + * always start reading datagrams into only the first page of the + * datagram buffer. If the datagrams don't fit into one page, we + * use the maximum datagram buffer size for the remainder of the + * invocation. This is a simple heuristic for not penalizing + * small datagrams. + * + * This function assumes that it has exclusive access to the data + * in port for the duration of the call. + */ +static void vmci_dispatch_dgs(unsigned long data) +{ + struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data; + u8 *dg_in_buffer = vmci_dev->data_buffer; + struct vmci_datagram *dg; + size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; + size_t current_dg_in_buffer_size = PAGE_SIZE; + size_t remaining_bytes; + + BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); + + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + + while (dg->dst.resource != VMCI_INVALID_ID || + remaining_bytes > PAGE_SIZE) { + unsigned dg_in_size; + + /* + * When the input buffer spans multiple pages, a datagram can + * start on any page boundary in the buffer. + */ + if (dg->dst.resource == VMCI_INVALID_ID) { + dg = (struct vmci_datagram *)roundup( + (uintptr_t)dg + 1, PAGE_SIZE); + remaining_bytes = + (size_t)(dg_in_buffer + + current_dg_in_buffer_size - + (u8 *) dg); + continue; + } + + dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); + + if (dg_in_size <= dg_in_buffer_size) { + int result; + + /* + * If the remaining bytes in the datagram + * buffer doesn't contain the complete + * datagram, we first make sure we have enough + * room for it and then we read the reminder + * of the datagram and possibly any following + * datagrams. + */ + if (dg_in_size > remaining_bytes) { + if (remaining_bytes != + current_dg_in_buffer_size) { + + /* + * We move the partial + * datagram to the front and + * read the reminder of the + * datagram and possibly + * following calls into the + * following bytes. + */ + memmove(dg_in_buffer, dg_in_buffer + + current_dg_in_buffer_size - + remaining_bytes, + remaining_bytes); + dg = (struct vmci_datagram *) + dg_in_buffer; + } + + if (current_dg_in_buffer_size != + dg_in_buffer_size) + current_dg_in_buffer_size = + dg_in_buffer_size; + + ioread8_rep(vmci_dev->iobase + + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer + + remaining_bytes, + current_dg_in_buffer_size - + remaining_bytes); + } + + /* + * We special case event datagrams from the + * hypervisor. + */ + if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && + dg->dst.resource == VMCI_EVENT_HANDLER) { + result = vmci_event_dispatch(dg); + } else { + result = vmci_datagram_invoke_guest_handler(dg); + } + if (result < VMCI_SUCCESS) + dev_dbg(vmci_dev->dev, + "Datagram with resource (ID=0x%x) failed (err=%d).\n", + dg->dst.resource, result); + + /* On to the next datagram. */ + dg = (struct vmci_datagram *)((u8 *) dg + + dg_in_size); + } else { + size_t bytes_to_skip; + + /* + * Datagram doesn't fit in datagram buffer of maximal + * size. We drop it. + */ + dev_dbg(vmci_dev->dev, + "Failed to receive datagram (size=%u bytes).\n", + dg_in_size); + + bytes_to_skip = dg_in_size - remaining_bytes; + if (current_dg_in_buffer_size != dg_in_buffer_size) + current_dg_in_buffer_size = dg_in_buffer_size; + + for (;;) { + ioread8_rep(vmci_dev->iobase + + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, + current_dg_in_buffer_size); + if (bytes_to_skip <= current_dg_in_buffer_size) + break; + + bytes_to_skip -= current_dg_in_buffer_size; + } + dg = (struct vmci_datagram *)(dg_in_buffer + + bytes_to_skip); + } + + remaining_bytes = + (size_t) (dg_in_buffer + current_dg_in_buffer_size - + (u8 *) dg); + + if (remaining_bytes < VMCI_DG_HEADERSIZE) { + /* Get the next batch of datagrams. */ + + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, + current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + } + } +} + +/* + * Scans the notification bitmap for raised flags, clears them + * and handles the notifications. + */ +static void vmci_process_bitmap(unsigned long data) +{ + struct vmci_guest_device *dev = (struct vmci_guest_device *)data; + + if (!dev->notification_bitmap) { + dev_dbg(dev->dev, "No bitmap present in %s.\n", __func__); + return; + } + + vmci_dbell_scan_notification_entries(dev->notification_bitmap); +} + +/* + * Enable MSI-X. Try exclusive vectors first, then shared vectors. + */ +static int vmci_enable_msix(struct pci_dev *pdev, + struct vmci_guest_device *vmci_dev) +{ + int i; + int result; + + for (i = 0; i < VMCI_MAX_INTRS; ++i) { + vmci_dev->msix_entries[i].entry = i; + vmci_dev->msix_entries[i].vector = i; + } + + result = pci_enable_msix(pdev, vmci_dev->msix_entries, VMCI_MAX_INTRS); + if (result == 0) + vmci_dev->exclusive_vectors = true; + else if (result > 0) + result = pci_enable_msix(pdev, vmci_dev->msix_entries, 1); + + return result; +} + +/* + * Interrupt handler for legacy or MSI interrupt, or for first MSI-X + * interrupt (vector VMCI_INTR_DATAGRAM). + */ +static irqreturn_t vmci_interrupt(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + /* + * If we are using MSI-X with exclusive vectors then we simply schedule + * the datagram tasklet, since we know the interrupt was meant for us. + * Otherwise we must read the ICR to determine what to do. + */ + + if (dev->intr_type == VMCI_INTR_TYPE_MSIX && dev->exclusive_vectors) { + tasklet_schedule(&dev->datagram_tasklet); + } else { + unsigned int icr; + + /* Acknowledge interrupt and determine what needs doing. */ + icr = ioread32(dev->iobase + VMCI_ICR_ADDR); + if (icr == 0 || icr == ~0) + return IRQ_NONE; + + if (icr & VMCI_ICR_DATAGRAM) { + tasklet_schedule(&dev->datagram_tasklet); + icr &= ~VMCI_ICR_DATAGRAM; + } + + if (icr & VMCI_ICR_NOTIFICATION) { + tasklet_schedule(&dev->bm_tasklet); + icr &= ~VMCI_ICR_NOTIFICATION; + } + + if (icr != 0) + dev_warn(dev->dev, + "Ignoring unknown interrupt cause (%d).\n", + icr); + } + + return IRQ_HANDLED; +} + +/* + * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, + * which is for the notification bitmap. Will only get called if we are + * using MSI-X with exclusive vectors. + */ +static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + /* For MSI-X we can just assume it was meant for us. */ + tasklet_schedule(&dev->bm_tasklet); + + return IRQ_HANDLED; +} + +/* + * Most of the initialization at module load time is done here. + */ +static int vmci_guest_probe_device(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct vmci_guest_device *vmci_dev; + void __iomem *iobase; + unsigned int capabilities; + unsigned long cmd; + int vmci_err; + int error; + + dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device.\n"); + + error = pcim_enable_device(pdev); + if (error) { + dev_err(&pdev->dev, + "Failed to enable VMCI device: %d\n", error); + return error; + } + + error = pcim_iomap_regions(pdev, 1 << 0, MODULE_NAME); + if (error) { + dev_err(&pdev->dev, "Failed to reserve/map IO regions.\n"); + return error; + } + + iobase = pcim_iomap_table(pdev)[0]; + + dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u.\n", + (unsigned long)iobase, pdev->irq); + + vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); + if (!vmci_dev) { + dev_err(&pdev->dev, + "Can't allocate memory for VMCI device.\n"); + return -ENOMEM; + } + + vmci_dev->dev = &pdev->dev; + vmci_dev->intr_type = VMCI_INTR_TYPE_INTX; + vmci_dev->exclusive_vectors = false; + vmci_dev->iobase = iobase; + + tasklet_init(&vmci_dev->datagram_tasklet, + vmci_dispatch_dgs, (unsigned long)vmci_dev); + tasklet_init(&vmci_dev->bm_tasklet, + vmci_process_bitmap, (unsigned long)vmci_dev); + + vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); + if (!vmci_dev->data_buffer) { + dev_err(&pdev->dev, + "Can't allocate memory for datagram buffer.\n"); + return -ENOMEM; + } + + pci_set_master(pdev); /* To enable queue_pair functionality. */ + + /* + * Verify that the VMCI Device supports the capabilities that + * we need. If the device is missing capabilities that we would + * like to use, check for fallback capabilities and use those + * instead (so we can run a new VM on old hosts). Fail the load if + * a required capability is missing and there is no fallback. + * + * Right now, we need datagrams. There are no fallbacks. + */ + capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR); + if (!(capabilities & VMCI_CAPS_DATAGRAM)) { + dev_err(&pdev->dev, "Device does not support datagrams.\n"); + error = -ENXIO; + goto err_free_data_buffer; + } + + /* + * If the hardware supports notifications, we will use that as + * well. + */ + if (capabilities & VMCI_CAPS_NOTIFICATIONS) { + vmci_dev->notification_bitmap = vmalloc(PAGE_SIZE); + if (!vmci_dev->notification_bitmap) { + dev_warn(&pdev->dev, + "Unable to allocate notification bitmap.\n"); + } else { + memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE); + capabilities |= VMCI_CAPS_NOTIFICATIONS; + } + } + + dev_info(&pdev->dev, "Using capabilities 0x%x.\n", capabilities); + + /* Let the host know which capabilities we intend to use. */ + iowrite32(capabilities, vmci_dev->iobase + VMCI_CAPS_ADDR); + + /* Set up global device so that we can start sending datagrams */ + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = vmci_dev; + spin_unlock_irq(&vmci_dev_spinlock); + + /* + * Register notification bitmap with device if that capability is + * used. + */ + if (capabilities & VMCI_CAPS_NOTIFICATIONS) { + struct page *page = + vmalloc_to_page(vmci_dev->notification_bitmap); + unsigned long bitmap_ppn = page_to_pfn(page); + if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { + dev_warn(&pdev->dev, + "VMCI device unable to register notification bitmap with PPN 0x%x.\n", + (u32) bitmap_ppn); + goto err_remove_vmci_dev_g; + } + } + + /* Check host capabilities. */ + if (!vmci_check_host_caps(pdev)) + goto err_remove_bitmap; + + /* Enable device. */ + + /* + * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can + * update the internal context id when needed. + */ + vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, + vmci_guest_cid_update, NULL, + &ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to subscribe to event (type=%d): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, vmci_err); + + /* + * Enable interrupts. Try MSI-X first, then MSI, and then fallback on + * legacy interrupts. + */ + if (!vmci_disable_msix && !vmci_enable_msix(pdev, vmci_dev)) { + vmci_dev->intr_type = VMCI_INTR_TYPE_MSIX; + vmci_dev->irq = vmci_dev->msix_entries[0].vector; + } else if (!vmci_disable_msi && !pci_enable_msi(pdev)) { + vmci_dev->intr_type = VMCI_INTR_TYPE_MSI; + vmci_dev->irq = pdev->irq; + } else { + vmci_dev->intr_type = VMCI_INTR_TYPE_INTX; + vmci_dev->irq = pdev->irq; + } + + /* + * Request IRQ for legacy or MSI interrupts, or for first + * MSI-X vector. + */ + error = request_irq(vmci_dev->irq, vmci_interrupt, IRQF_SHARED, + MODULE_NAME, vmci_dev); + if (error) { + dev_err(&pdev->dev, "Irq %u in use: %d\n", + vmci_dev->irq, error); + goto err_disable_msi; + } + + /* + * For MSI-X with exclusive vectors we need to request an + * interrupt for each vector so that we get a separate + * interrupt handler routine. This allows us to distinguish + * between the vectors. + */ + if (vmci_dev->exclusive_vectors) { + error = request_irq(vmci_dev->msix_entries[1].vector, + vmci_interrupt_bm, 0, MODULE_NAME, + vmci_dev); + if (error) { + dev_err(&pdev->dev, + "Failed to allocate irq %u: %d\n", + vmci_dev->msix_entries[1].vector, error); + goto err_free_irq; + } + } + + dev_dbg(&pdev->dev, "Registered device.\n"); + + atomic_inc(&vmci_num_guest_devices); + + /* Enable specific interrupt bits. */ + cmd = VMCI_IMR_DATAGRAM; + if (capabilities & VMCI_CAPS_NOTIFICATIONS) + cmd |= VMCI_IMR_NOTIFICATION; + iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR); + + /* Enable interrupts. */ + iowrite32(VMCI_CONTROL_INT_ENABLE, + vmci_dev->iobase + VMCI_CONTROL_ADDR); + + pci_set_drvdata(pdev, vmci_dev); + return 0; + +err_free_irq: + free_irq(vmci_dev->irq, &vmci_dev); + tasklet_kill(&vmci_dev->datagram_tasklet); + tasklet_kill(&vmci_dev->bm_tasklet); + +err_disable_msi: + if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSIX) + pci_disable_msix(pdev); + else if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSI) + pci_disable_msi(pdev); + + vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); + +err_remove_bitmap: + if (vmci_dev->notification_bitmap) { + iowrite32(VMCI_CONTROL_RESET, + vmci_dev->iobase + VMCI_CONTROL_ADDR); + vfree(vmci_dev->notification_bitmap); + } + +err_remove_vmci_dev_g: + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = NULL; + spin_unlock_irq(&vmci_dev_spinlock); + +err_free_data_buffer: + vfree(vmci_dev->data_buffer); + + /* The rest are managed resources and will be freed by PCI core */ + return error; +} + +static void vmci_guest_remove_device(struct pci_dev *pdev) +{ + struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); + int vmci_err; + + dev_dbg(&pdev->dev, "Removing device\n"); + + atomic_dec(&vmci_num_guest_devices); + + vmci_qp_guest_endpoints_exit(); + + vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); + + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = NULL; + spin_unlock_irq(&vmci_dev_spinlock); + + dev_dbg(&pdev->dev, "Resetting vmci device\n"); + iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR); + + /* + * Free IRQ and then disable MSI/MSI-X as appropriate. For + * MSI-X, we might have multiple vectors, each with their own + * IRQ, which we must free too. + */ + free_irq(vmci_dev->irq, vmci_dev); + if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSIX) { + if (vmci_dev->exclusive_vectors) + free_irq(vmci_dev->msix_entries[1].vector, vmci_dev); + pci_disable_msix(pdev); + } else if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSI) { + pci_disable_msi(pdev); + } + + tasklet_kill(&vmci_dev->datagram_tasklet); + tasklet_kill(&vmci_dev->bm_tasklet); + + if (vmci_dev->notification_bitmap) { + /* + * The device reset above cleared the bitmap state of the + * device, so we can safely free it here. + */ + + vfree(vmci_dev->notification_bitmap); + } + + vfree(vmci_dev->data_buffer); + + /* The rest are managed resources and will be freed by PCI core */ +} + +static DEFINE_PCI_DEVICE_TABLE(vmci_ids) = { + { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, + { 0 }, +}; +MODULE_DEVICE_TABLE(pci, vmci_ids); + +static struct pci_driver vmci_guest_driver = { + .name = MODULE_NAME, + .id_table = vmci_ids, + .probe = vmci_guest_probe_device, + .remove = vmci_guest_remove_device, +}; + +int __init vmci_guest_init(void) +{ + return pci_register_driver(&vmci_guest_driver); +} + +void __exit vmci_guest_exit(void) +{ + pci_unregister_driver(&vmci_guest_driver); +}